diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 458cf96e..4713d95b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -47,7 +47,7 @@ jobs: - name: Install stable toolchain run: | rustup set profile minimal - rustup override set 1.61.0 + rustup override set 1.65.0 - run: cargo check --lib --all-features diff --git a/html5ever/benches/html5ever.rs b/html5ever/benches/html5ever.rs index 5b5e1c9c..7d3b0a2a 100644 --- a/html5ever/benches/html5ever.rs +++ b/html5ever/benches/html5ever.rs @@ -15,7 +15,7 @@ struct Sink; impl TokenSink for Sink { type Handle = (); - fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> { // Don't use the token, but make sure we don't get // optimized out entirely. black_box(token); @@ -53,7 +53,7 @@ fn run_bench(c: &mut Criterion, name: &str) { c.bench_function(&test_name, move |b| { b.iter(|| { - let mut tok = Tokenizer::new(Sink, Default::default()); + let tok = Tokenizer::new(Sink, Default::default()); let buffer = BufferQueue::default(); // We are doing clone inside the bench function, this is not ideal, but possibly // necessary since our iterator consumes the underlying buffer. diff --git a/html5ever/examples/arena.rs b/html5ever/examples/arena.rs index acc05705..cfdb0416 100644 --- a/html5ever/examples/arena.rs +++ b/html5ever/examples/arena.rs @@ -24,7 +24,7 @@ fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<' let sink = Sink { arena, document: arena.alloc(Node::new(NodeData::Document)), - quirks_mode: QuirksMode::NoQuirks, + quirks_mode: Cell::new(QuirksMode::NoQuirks), }; parse_document(sink, Default::default()) @@ -41,7 +41,7 @@ type Link<'arena> = Cell>>; struct Sink<'arena> { arena: Arena<'arena>, document: Ref<'arena>, - quirks_mode: QuirksMode, + quirks_mode: Cell, } /// DOM node which contains links to other nodes in the tree. @@ -188,14 +188,14 @@ impl<'arena> TreeSink for Sink<'arena> { self.document } - fn parse_error(&mut self, _: Cow<'static, str>) {} + fn parse_error(&self, _: Cow<'static, str>) {} - fn get_document(&mut self) -> Ref<'arena> { + fn get_document(&self) -> Ref<'arena> { self.document } - fn set_quirks_mode(&mut self, mode: QuirksMode) { - self.quirks_mode = mode; + fn set_quirks_mode(&self, mode: QuirksMode) { + self.quirks_mode.set(mode); } fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { @@ -209,7 +209,7 @@ impl<'arena> TreeSink for Sink<'arena> { } } - fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { + fn get_template_contents(&self, target: &Ref<'arena>) -> Ref<'arena> { if let NodeData::Element { template_contents: Some(contents), .. @@ -234,7 +234,7 @@ impl<'arena> TreeSink for Sink<'arena> { } fn create_element( - &mut self, + &self, name: QualName, attrs: Vec, flags: ElementFlags, @@ -251,18 +251,18 @@ impl<'arena> TreeSink for Sink<'arena> { }) } - fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { + fn create_comment(&self, text: StrTendril) -> Ref<'arena> { self.new_node(NodeData::Comment { contents: text }) } - fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { + fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref<'arena> { self.new_node(NodeData::ProcessingInstruction { target, contents: data, }) } - fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText>) { + fn append(&self, parent: &Ref<'arena>, child: NodeOrText>) { self.append_common( child, || parent.last_child.get(), @@ -270,7 +270,7 @@ impl<'arena> TreeSink for Sink<'arena> { ) } - fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText>) { + fn append_before_sibling(&self, sibling: &Ref<'arena>, child: NodeOrText>) { self.append_common( child, || sibling.previous_sibling.get(), @@ -279,7 +279,7 @@ impl<'arena> TreeSink for Sink<'arena> { } fn append_based_on_parent_node( - &mut self, + &self, element: &Ref<'arena>, prev_element: &Ref<'arena>, child: NodeOrText>, @@ -292,7 +292,7 @@ impl<'arena> TreeSink for Sink<'arena> { } fn append_doctype_to_document( - &mut self, + &self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, @@ -304,7 +304,7 @@ impl<'arena> TreeSink for Sink<'arena> { })) } - fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec) { + fn add_attrs_if_missing(&self, target: &Ref<'arena>, attrs: Vec) { let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { attrs.borrow_mut() } else { @@ -322,11 +322,11 @@ impl<'arena> TreeSink for Sink<'arena> { ); } - fn remove_from_parent(&mut self, target: &Ref<'arena>) { + fn remove_from_parent(&self, target: &Ref<'arena>) { target.detach() } - fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { + fn reparent_children(&self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { let mut next_child = node.first_child.get(); while let Some(child) = next_child { debug_assert!(ptr::eq::(child.parent.get().unwrap(), *node)); diff --git a/html5ever/examples/noop-tokenize.rs b/html5ever/examples/noop-tokenize.rs index c5100563..a95404df 100644 --- a/html5ever/examples/noop-tokenize.rs +++ b/html5ever/examples/noop-tokenize.rs @@ -11,20 +11,21 @@ extern crate html5ever; +use std::cell::RefCell; use std::io; use html5ever::tendril::*; use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; /// In our case, our sink only contains a tokens vector -struct Sink(Vec); +struct Sink(RefCell>); impl TokenSink for Sink { type Handle = (); /// Each processed token will be handled by this method - fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { - self.0.push(token); + fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + self.0.borrow_mut().push(token); TokenSinkResult::Continue } } @@ -39,7 +40,7 @@ fn main() { let input = BufferQueue::default(); input.push_back(chunk.try_reinterpret().unwrap()); - let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default()); + let tok = Tokenizer::new(Sink(RefCell::new(Vec::new())), Default::default()); let _ = tok.feed(&input); assert!(input.is_empty()); tok.end(); diff --git a/html5ever/examples/noop-tree-builder.rs b/html5ever/examples/noop-tree-builder.rs index 1baebf99..ab7fc531 100644 --- a/html5ever/examples/noop-tree-builder.rs +++ b/html5ever/examples/noop-tree-builder.rs @@ -11,6 +11,7 @@ extern crate html5ever; use std::borrow::Cow; +use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::io; @@ -20,14 +21,14 @@ use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::{Attribute, ExpandedName, QualName}; struct Sink { - next_id: usize, - names: HashMap, + next_id: Cell, + names: RefCell>, } impl Sink { - fn get_id(&mut self) -> usize { - let id = self.next_id; - self.next_id += 2; + fn get_id(&self) -> usize { + let id = self.next_id.get(); + self.next_id.set(id + 2); id } } @@ -43,12 +44,13 @@ impl TreeSink for Sink { self } - fn get_document(&mut self) -> usize { + fn get_document(&self) -> usize { 0 } - fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) + fn get_template_contents(&self, target: &usize) -> usize { + if let Some(expanded_name!(html "template")) = + self.names.borrow().get(target).map(|n| n.expanded()) { target + 1 } else { @@ -61,53 +63,63 @@ impl TreeSink for Sink { } fn elem_name(&self, target: &usize) -> ExpandedName { - self.names.get(target).expect("not an element").expanded() + self.names + .borrow() + .get(target) + .expect("not an element") + .expanded() } - fn create_element(&mut self, name: QualName, _: Vec, _: ElementFlags) -> usize { + fn create_element(&self, name: QualName, _: Vec, _: ElementFlags) -> usize { let id = self.get_id(); - self.names.insert(id, name); + // N.B. We intentionally leak memory here to minimize the implementation complexity + // of this example code. A real implementation would either want to use a real + // real DOM tree implentation, or else use an arena as the backing store for + // memory used by the parser. + self.names + .borrow_mut() + .insert(id, Box::leak(Box::new(name))); id } - fn create_comment(&mut self, _text: StrTendril) -> usize { + fn create_comment(&self, _text: StrTendril) -> usize { self.get_id() } #[allow(unused_variables)] - fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize { + fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize { unimplemented!() } - fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText) {} + fn append_before_sibling(&self, _sibling: &usize, _new_node: NodeOrText) {} fn append_based_on_parent_node( - &mut self, + &self, _element: &usize, _prev_element: &usize, _new_node: NodeOrText, ) { } - fn parse_error(&mut self, _msg: Cow<'static, str>) {} - fn set_quirks_mode(&mut self, _mode: QuirksMode) {} - fn append(&mut self, _parent: &usize, _child: NodeOrText) {} + fn parse_error(&self, _msg: Cow<'static, str>) {} + fn set_quirks_mode(&self, _mode: QuirksMode) {} + fn append(&self, _parent: &usize, _child: NodeOrText) {} - fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {} - fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec) { - assert!(self.names.contains_key(target), "not an element"); + fn append_doctype_to_document(&self, _: StrTendril, _: StrTendril, _: StrTendril) {} + fn add_attrs_if_missing(&self, target: &usize, _attrs: Vec) { + assert!(self.names.borrow().contains_key(target), "not an element"); } - fn remove_from_parent(&mut self, _target: &usize) {} - fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {} - fn mark_script_already_started(&mut self, _node: &usize) {} + fn remove_from_parent(&self, _target: &usize) {} + fn reparent_children(&self, _node: &usize, _new_parent: &usize) {} + fn mark_script_already_started(&self, _node: &usize) {} } /// In this example we implement the TreeSink trait which takes each parsed elements and insert /// it to a hashmap, while each element is given a numeric id. fn main() { let sink = Sink { - next_id: 1, - names: HashMap::new(), + next_id: Cell::new(1), + names: RefCell::new(HashMap::new()), }; // Read HTML from the standard input and parse it diff --git a/html5ever/examples/print-tree-actions.rs b/html5ever/examples/print-tree-actions.rs index 2fcf0ad8..104d6444 100644 --- a/html5ever/examples/print-tree-actions.rs +++ b/html5ever/examples/print-tree-actions.rs @@ -11,6 +11,7 @@ extern crate html5ever; use std::borrow::Cow; +use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::io; @@ -22,14 +23,14 @@ use html5ever::tree_builder::{ use html5ever::{Attribute, ExpandedName, QualName}; struct Sink { - next_id: usize, - names: HashMap, + next_id: Cell, + names: RefCell>, } impl Sink { - fn get_id(&mut self) -> usize { - let id = self.next_id; - self.next_id += 2; + fn get_id(&self) -> usize { + let id = self.next_id.get(); + self.next_id.set(id + 2); id } } @@ -41,16 +42,17 @@ impl TreeSink for Sink { self } - fn parse_error(&mut self, msg: Cow<'static, str>) { + fn parse_error(&self, msg: Cow<'static, str>) { println!("Parse error: {}", msg); } - fn get_document(&mut self) -> usize { + fn get_document(&self) -> usize { 0 } - fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) + fn get_template_contents(&self, target: &usize) -> usize { + if let Some(expanded_name!(html "template")) = + self.names.borrow().get(target).map(|n| n.expanded()) { target + 1 } else { @@ -58,7 +60,7 @@ impl TreeSink for Sink { } } - fn set_quirks_mode(&mut self, mode: QuirksMode) { + fn set_quirks_mode(&self, mode: QuirksMode) { println!("Set quirks mode to {:?}", mode); } @@ -67,35 +69,46 @@ impl TreeSink for Sink { } fn elem_name(&self, target: &usize) -> ExpandedName { - self.names.get(target).expect("not an element").expanded() + self.names + .borrow() + .get(target) + .cloned() + .expect("not an element") + .expanded() } - fn create_element(&mut self, name: QualName, _: Vec, _: ElementFlags) -> usize { + fn create_element(&self, name: QualName, _: Vec, _: ElementFlags) -> usize { let id = self.get_id(); println!("Created {:?} as {}", name, id); - self.names.insert(id, name); + // N.B. We intentionally leak memory here to minimize the implementation complexity + // of this example code. A real implementation would either want to use a real + // real DOM tree implentation, or else use an arena as the backing store for + // memory used by the parser. + self.names + .borrow_mut() + .insert(id, Box::leak(Box::new(name))); id } - fn create_comment(&mut self, text: StrTendril) -> usize { + fn create_comment(&self, text: StrTendril) -> usize { let id = self.get_id(); println!("Created comment \"{}\" as {}", text.escape_default(), id); id } #[allow(unused_variables)] - fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize { + fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize { unimplemented!() } - fn append(&mut self, parent: &usize, child: NodeOrText) { + fn append(&self, parent: &usize, child: NodeOrText) { match child { AppendNode(n) => println!("Append node {} to {}", n, parent), AppendText(t) => println!("Append text to {}: \"{}\"", parent, t.escape_default()), } } - fn append_before_sibling(&mut self, sibling: &usize, new_node: NodeOrText) { + fn append_before_sibling(&self, sibling: &usize, new_node: NodeOrText) { match new_node { AppendNode(n) => println!("Append node {} before {}", n, sibling), AppendText(t) => println!("Append text before {}: \"{}\"", sibling, t.escape_default()), @@ -103,7 +116,7 @@ impl TreeSink for Sink { } fn append_based_on_parent_node( - &mut self, + &self, element: &Self::Handle, _prev_element: &Self::Handle, child: NodeOrText, @@ -112,7 +125,7 @@ impl TreeSink for Sink { } fn append_doctype_to_document( - &mut self, + &self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, @@ -120,8 +133,8 @@ impl TreeSink for Sink { println!("Append doctype: {} {} {}", name, public_id, system_id); } - fn add_attrs_if_missing(&mut self, target: &usize, attrs: Vec) { - assert!(self.names.contains_key(target), "not an element"); + fn add_attrs_if_missing(&self, target: &usize, attrs: Vec) { + assert!(self.names.borrow().contains_key(target), "not an element"); println!("Add missing attributes to {}:", target); for attr in attrs.into_iter() { println!(" {:?} = {}", attr.name, attr.value); @@ -129,7 +142,7 @@ impl TreeSink for Sink { } fn associate_with_form( - &mut self, + &self, _target: &usize, _form: &usize, _nodes: (&usize, Option<&usize>), @@ -137,23 +150,23 @@ impl TreeSink for Sink { // No form owner support. } - fn remove_from_parent(&mut self, target: &usize) { + fn remove_from_parent(&self, target: &usize) { println!("Remove {} from parent", target); } - fn reparent_children(&mut self, node: &usize, new_parent: &usize) { + fn reparent_children(&self, node: &usize, new_parent: &usize) { println!("Move children from {} to {}", node, new_parent); } - fn mark_script_already_started(&mut self, node: &usize) { + fn mark_script_already_started(&self, node: &usize) { println!("Mark script {} as already started", node); } - fn set_current_line(&mut self, line_number: u64) { + fn set_current_line(&self, line_number: u64) { println!("Set current line to {}", line_number); } - fn pop(&mut self, elem: &usize) { + fn pop(&self, elem: &usize) { println!("Popped element {}", elem); } } @@ -163,8 +176,8 @@ impl TreeSink for Sink { /// called. fn main() { let sink = Sink { - next_id: 1, - names: HashMap::new(), + next_id: Cell::new(1), + names: RefCell::new(HashMap::new()), }; let stdin = io::stdin(); parse_document(sink, Default::default()) diff --git a/html5ever/examples/tokenize.rs b/html5ever/examples/tokenize.rs index f262de17..912a6ce6 100644 --- a/html5ever/examples/tokenize.rs +++ b/html5ever/examples/tokenize.rs @@ -9,6 +9,7 @@ extern crate html5ever; +use std::cell::Cell; use std::io; use html5ever::tendril::*; @@ -18,22 +19,22 @@ use html5ever::tokenizer::{ ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts, }; -#[derive(Copy, Clone)] +#[derive(Clone)] struct TokenPrinter { - in_char_run: bool, + in_char_run: Cell, } impl TokenPrinter { - fn is_char(&mut self, is_char: bool) { - match (self.in_char_run, is_char) { + fn is_char(&self, is_char: bool) { + match (self.in_char_run.get(), is_char) { (false, true) => print!("CHAR : \""), (true, false) => println!("\""), _ => (), } - self.in_char_run = is_char; + self.in_char_run.set(is_char); } - fn do_char(&mut self, c: char) { + fn do_char(&self, c: char) { self.is_char(true); print!("{}", c.escape_default().collect::()); } @@ -42,7 +43,7 @@ impl TokenPrinter { impl TokenSink for TokenPrinter { type Handle = (); - fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> { match token { CharacterTokens(b) => { for c in b.chars() { @@ -84,7 +85,9 @@ impl TokenSink for TokenPrinter { /// In this example we implement the TokenSink trait in such a way that each token is printed. /// If a there's an error while processing a token it is printed as well. fn main() { - let mut sink = TokenPrinter { in_char_run: false }; + let sink = TokenPrinter { + in_char_run: Cell::new(false), + }; // Read HTML from standard input let mut chunk = ByteTendril::new(); @@ -93,7 +96,7 @@ fn main() { let input = BufferQueue::default(); input.push_back(chunk.try_reinterpret().unwrap()); - let mut tok = Tokenizer::new( + let tok = Tokenizer::new( sink, TokenizerOpts { profile: true, @@ -104,5 +107,5 @@ fn main() { assert!(input.is_empty()); tok.end(); - sink.is_char(false); + tok.sink.is_char(false); } diff --git a/html5ever/src/driver.rs b/html5ever/src/driver.rs index b178c469..6a151ee3 100644 --- a/html5ever/src/driver.rs +++ b/html5ever/src/driver.rs @@ -57,7 +57,7 @@ where /// /// If your input is bytes, use `Parser::from_utf8`. pub fn parse_fragment( - mut sink: Sink, + sink: Sink, opts: ParseOpts, context_name: QualName, context_attrs: Vec, @@ -65,7 +65,7 @@ pub fn parse_fragment( where Sink: TreeSink, { - let context_elem = create_element(&mut sink, context_name, context_attrs); + let context_elem = create_element(&sink, context_name, context_attrs); parse_fragment_for_element(sink, opts, context_elem, None) } @@ -116,7 +116,7 @@ impl TendrilSink for Parser { type Output = Sink::Output; - fn finish(mut self) -> Self::Output { + fn finish(self) -> Self::Output { // FIXME: Properly support somehow. while let TokenizerResult::Script(_) = self.tokenizer.feed(&self.input_buffer) {} assert!(self.input_buffer.is_empty()); diff --git a/html5ever/src/macros.rs b/html5ever/src/macros.rs index 558a4a95..a220a940 100644 --- a/html5ever/src/macros.rs +++ b/html5ever/src/macros.rs @@ -8,12 +8,10 @@ // except according to those terms. macro_rules! unwrap_or_else { - ($opt:expr, $else_block:block) => { - match $opt { - None => $else_block, - Some(x) => x, - } - }; + ($opt:expr, $else_block:block) => {{ + let Some(x) = $opt else { $else_block }; + x + }}; } macro_rules! unwrap_or_return { diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs index af5d650e..2f3c6c66 100644 --- a/html5ever/src/tokenizer/char_ref/mod.rs +++ b/html5ever/src/tokenizer/char_ref/mod.rs @@ -114,7 +114,7 @@ impl CharRefTokenizer { impl CharRefTokenizer { pub(super) fn step( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { if self.result.is_some() { @@ -134,7 +134,7 @@ impl CharRefTokenizer { fn do_begin( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { @@ -155,7 +155,7 @@ impl CharRefTokenizer { fn do_octothorpe( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { let c = unwrap_or_return!(tokenizer.peek(input), Stuck); @@ -176,7 +176,7 @@ impl CharRefTokenizer { fn do_numeric( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, base: u32, ) -> Status { @@ -206,7 +206,7 @@ impl CharRefTokenizer { fn do_numeric_semicolon( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { @@ -220,7 +220,7 @@ impl CharRefTokenizer { fn unconsume_numeric( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { let mut unconsume = StrTendril::from_char('#'); @@ -233,7 +233,7 @@ impl CharRefTokenizer { self.finish_none() } - fn finish_numeric(&mut self, tokenizer: &mut Tokenizer) -> Status { + fn finish_numeric(&mut self, tokenizer: &Tokenizer) -> Status { fn conv(n: u32) -> char { from_u32(n).expect("invalid char missed by error handling cases") } @@ -269,7 +269,7 @@ impl CharRefTokenizer { fn do_named( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { // peek + discard skips over newline normalization, therefore making it easier to @@ -294,7 +294,7 @@ impl CharRefTokenizer { } } - fn emit_name_error(&mut self, tokenizer: &mut Tokenizer) { + fn emit_name_error(&mut self, tokenizer: &Tokenizer) { let msg = format_if!( tokenizer.opts.exact_errors, "Invalid character reference", @@ -310,7 +310,7 @@ impl CharRefTokenizer { fn finish_named( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, end_char: Option, ) -> Status { @@ -381,7 +381,7 @@ impl CharRefTokenizer { self.finish_none() } else { input.push_front(StrTendril::from_slice(&self.name_buf()[name_len..])); - tokenizer.ignore_lf = false; + tokenizer.ignore_lf.set(false); self.result = Some(CharRef { chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()], num_chars: if c2 == 0 { 1 } else { 2 }, @@ -394,7 +394,7 @@ impl CharRefTokenizer { fn do_bogus_name( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) -> Status { // peek + discard skips over newline normalization, therefore making it easier to @@ -413,7 +413,7 @@ impl CharRefTokenizer { pub(super) fn end_of_file( &mut self, - tokenizer: &mut Tokenizer, + tokenizer: &Tokenizer, input: &BufferQueue, ) { while self.result.is_none() { diff --git a/html5ever/src/tokenizer/interface.rs b/html5ever/src/tokenizer/interface.rs index bb9f89dc..edc6afb9 100644 --- a/html5ever/src/tokenizer/interface.rs +++ b/html5ever/src/tokenizer/interface.rs @@ -84,10 +84,10 @@ pub trait TokenSink { type Handle; /// Process a token. - fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult; + fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult; // Signal sink that tokenization reached the end. - fn end(&mut self) {} + fn end(&self) {} /// Used in the markup declaration open state. By default, this always /// returns false and thus all CDATA sections are tokenized as bogus diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 5aa5ba55..0a632875 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -27,6 +27,7 @@ use log::{debug, trace}; use mac::format_if; use markup5ever::{namespace_url, ns, small_char_set}; use std::borrow::Cow::{self, Borrowed}; +use std::cell::{Cell, RefCell, RefMut}; use std::collections::BTreeMap; use std::mem; @@ -106,68 +107,68 @@ pub struct Tokenizer { pub sink: Sink, /// The abstract machine state as described in the spec. - state: states::State, + state: Cell, /// Are we at the end of the file, once buffers have been processed /// completely? This affects whether we will wait for lookahead or not. - at_eof: bool, + at_eof: Cell, /// Tokenizer for character references, if we're tokenizing /// one at the moment. - char_ref_tokenizer: Option>, + char_ref_tokenizer: RefCell>>, /// Current input character. Just consumed, may reconsume. - current_char: char, + current_char: Cell, /// Should we reconsume the current input character? - reconsume: bool, + reconsume: Cell, /// Did we just consume \r, translating it to \n? In that case we need /// to ignore the next character if it's \n. - ignore_lf: bool, + ignore_lf: Cell, /// Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the /// beginning of the stream. - discard_bom: bool, + discard_bom: Cell, /// Current tag kind. - current_tag_kind: TagKind, + current_tag_kind: Cell, /// Current tag name. - current_tag_name: StrTendril, + current_tag_name: RefCell, /// Current tag is self-closing? - current_tag_self_closing: bool, + current_tag_self_closing: Cell, /// Current tag attributes. - current_tag_attrs: Vec, + current_tag_attrs: RefCell>, /// Current attribute name. - current_attr_name: StrTendril, + current_attr_name: RefCell, /// Current attribute value. - current_attr_value: StrTendril, + current_attr_value: RefCell, /// Current comment. - current_comment: StrTendril, + current_comment: RefCell, /// Current doctype token. - current_doctype: Doctype, + current_doctype: RefCell, /// Last start tag name, for use in checking "appropriate end tag". - last_start_tag_name: Option, + last_start_tag_name: RefCell>, /// The "temporary buffer" mentioned in the spec. - temp_buf: StrTendril, + temp_buf: RefCell, /// Record of how many ns we spent in each state, if profiling is enabled. - state_profile: BTreeMap, + state_profile: RefCell>, /// Record of how many ns we spent in the token sink. - time_in_sink: u64, + time_in_sink: Cell, /// Track current line - current_line: u64, + current_line: Cell, } impl Tokenizer { @@ -182,36 +183,36 @@ impl Tokenizer { Tokenizer { opts, sink, - state, - char_ref_tokenizer: None, - at_eof: false, - current_char: '\0', - reconsume: false, - ignore_lf: false, - discard_bom, - current_tag_kind: StartTag, - current_tag_name: StrTendril::new(), - current_tag_self_closing: false, - current_tag_attrs: vec![], - current_attr_name: StrTendril::new(), - current_attr_value: StrTendril::new(), - current_comment: StrTendril::new(), - current_doctype: Doctype::default(), - last_start_tag_name: start_tag_name, - temp_buf: StrTendril::new(), - state_profile: BTreeMap::new(), - time_in_sink: 0, - current_line: 1, + state: Cell::new(state), + char_ref_tokenizer: RefCell::new(None), + at_eof: Cell::new(false), + current_char: Cell::new('\0'), + reconsume: Cell::new(false), + ignore_lf: Cell::new(false), + discard_bom: Cell::new(discard_bom), + current_tag_kind: Cell::new(StartTag), + current_tag_name: RefCell::new(StrTendril::new()), + current_tag_self_closing: Cell::new(false), + current_tag_attrs: RefCell::new(vec![]), + current_attr_name: RefCell::new(StrTendril::new()), + current_attr_value: RefCell::new(StrTendril::new()), + current_comment: RefCell::new(StrTendril::new()), + current_doctype: RefCell::new(Doctype::default()), + last_start_tag_name: RefCell::new(start_tag_name), + temp_buf: RefCell::new(StrTendril::new()), + state_profile: RefCell::new(BTreeMap::new()), + time_in_sink: Cell::new(0), + current_line: Cell::new(1), } } /// Feed an input string into the tokenizer. - pub fn feed(&mut self, input: &BufferQueue) -> TokenizerResult { + pub fn feed(&self, input: &BufferQueue) -> TokenizerResult { if input.is_empty() { return TokenizerResult::Done; } - if self.discard_bom { + if self.discard_bom.get() { if let Some(c) = input.peek() { if c == '\u{feff}' { input.next(); @@ -224,21 +225,21 @@ impl Tokenizer { self.run(input) } - pub fn set_plaintext_state(&mut self) { - self.state = states::Plaintext; + pub fn set_plaintext_state(&self) { + self.state.set(states::Plaintext); } - fn process_token(&mut self, token: Token) -> TokenSinkResult { + fn process_token(&self, token: Token) -> TokenSinkResult { if self.opts.profile { - let (ret, dt) = time!(self.sink.process_token(token, self.current_line)); - self.time_in_sink += dt; + let (ret, dt) = time!(self.sink.process_token(token, self.current_line.get())); + self.time_in_sink.set(self.time_in_sink.get() + dt); ret } else { - self.sink.process_token(token, self.current_line) + self.sink.process_token(token, self.current_line.get()) } } - fn process_token_and_continue(&mut self, token: Token) { + fn process_token_and_continue(&self, token: Token) { assert!(matches!( self.process_token(token), TokenSinkResult::Continue @@ -248,21 +249,21 @@ impl Tokenizer { //§ preprocessing-the-input-stream // Get the next input character, which might be the character // 'c' that we already consumed from the buffers. - fn get_preprocessed_char(&mut self, mut c: char, input: &BufferQueue) -> Option { - if self.ignore_lf { - self.ignore_lf = false; + fn get_preprocessed_char(&self, mut c: char, input: &BufferQueue) -> Option { + if self.ignore_lf.get() { + self.ignore_lf.set(false); if c == '\n' { - c = unwrap_or_return!(input.next(), None); + c = input.next()?; } } if c == '\r' { - self.ignore_lf = true; + self.ignore_lf.set(true); c = '\n'; } if c == '\n' { - self.current_line += 1; + self.current_line.set(self.current_line.get() + 1); } if self.opts.exact_errors @@ -277,16 +278,16 @@ impl Tokenizer { } trace!("got character {}", c); - self.current_char = c; + self.current_char.set(c); Some(c) } //§ tokenization // Get the next input character, if one is available. - fn get_char(&mut self, input: &BufferQueue) -> Option { - if self.reconsume { - self.reconsume = false; - Some(self.current_char) + fn get_char(&self, input: &BufferQueue) -> Option { + if self.reconsume.get() { + self.reconsume.set(false); + Some(self.current_char.get()) } else { input .next() @@ -294,12 +295,12 @@ impl Tokenizer { } } - fn pop_except_from(&mut self, input: &BufferQueue, set: SmallCharSet) -> Option { + fn pop_except_from(&self, input: &BufferQueue, set: SmallCharSet) -> Option { // Bail to the slow path for various corner cases. // This means that `FromSet` can contain characters not in the set! // It shouldn't matter because the fallback `FromSet` case should // always do the same thing as the `NotFromSet` case. - if self.opts.exact_errors || self.reconsume || self.ignore_lf { + if self.opts.exact_errors || self.reconsume.get() || self.ignore_lf.get() { return self.get_char(input).map(FromSet); } @@ -319,20 +320,20 @@ impl Tokenizer { // BufferQueue::eat. // // NB: this doesn't set the current input character. - fn eat(&mut self, input: &BufferQueue, pat: &str, eq: fn(&u8, &u8) -> bool) -> Option { - if self.ignore_lf { - self.ignore_lf = false; + fn eat(&self, input: &BufferQueue, pat: &str, eq: fn(&u8, &u8) -> bool) -> Option { + if self.ignore_lf.get() { + self.ignore_lf.set(false); if self.peek(input) == Some('\n') { self.discard_char(input); } } - input.push_front(mem::take(&mut self.temp_buf)); + input.push_front(mem::take(&mut self.temp_buf.borrow_mut())); match input.eat(pat, eq) { - None if self.at_eof => Some(false), + None if self.at_eof.get() => Some(false), None => { while let Some(data) = input.next() { - self.temp_buf.push_char(data); + self.temp_buf.borrow_mut().push_char(data); } None }, @@ -341,14 +342,14 @@ impl Tokenizer { } /// Run the state machine for as long as we can. - fn run(&mut self, input: &BufferQueue) -> TokenizerResult { + fn run(&self, input: &BufferQueue) -> TokenizerResult { if self.opts.profile { loop { - let state = self.state; - let old_sink = self.time_in_sink; + let state = self.state.get(); + let old_sink = self.time_in_sink.get(); let (run, mut dt) = time!(self.step(input)); - dt -= (self.time_in_sink - old_sink); - let new = match self.state_profile.get_mut(&state) { + dt -= (self.time_in_sink.get() - old_sink); + let new = match self.state_profile.borrow_mut().get_mut(&state) { Some(x) => { *x += dt; false @@ -357,7 +358,7 @@ impl Tokenizer { }; if new { // do this here because of borrow shenanigans - self.state_profile.insert(state, dt); + self.state_profile.borrow_mut().insert(state, dt); } match run { ProcessResult::Continue => (), @@ -377,28 +378,28 @@ impl Tokenizer { TokenizerResult::Done } - fn bad_char_error(&mut self) { + fn bad_char_error(&self) { let msg = format_if!( self.opts.exact_errors, "Bad character", "Saw {} in state {:?}", - self.current_char, - self.state + self.current_char.get(), + self.state.get() ); self.emit_error(msg); } - fn bad_eof_error(&mut self) { + fn bad_eof_error(&self) { let msg = format_if!( self.opts.exact_errors, "Unexpected EOF", "Saw EOF in state {:?}", - self.state + self.state.get() ); self.emit_error(msg); } - fn emit_char(&mut self, c: char) { + fn emit_char(&self, c: char) { self.process_token_and_continue(match c { '\0' => NullCharacterToken, _ => CharacterTokens(StrTendril::from_char(c)), @@ -406,178 +407,183 @@ impl Tokenizer { } // The string must not contain '\0'! - fn emit_chars(&mut self, b: StrTendril) { + fn emit_chars(&self, b: StrTendril) { self.process_token_and_continue(CharacterTokens(b)); } - fn emit_current_tag(&mut self) -> ProcessResult { + fn emit_current_tag(&self) -> ProcessResult { self.finish_attribute(); - let name = LocalName::from(&*self.current_tag_name); - self.current_tag_name.clear(); + let name = LocalName::from(&**self.current_tag_name.borrow()); + self.current_tag_name.borrow_mut().clear(); - match self.current_tag_kind { + match self.current_tag_kind.get() { StartTag => { - self.last_start_tag_name = Some(name.clone()); + *self.last_start_tag_name.borrow_mut() = Some(name.clone()); }, EndTag => { - if !self.current_tag_attrs.is_empty() { + if !self.current_tag_attrs.borrow().is_empty() { self.emit_error(Borrowed("Attributes on an end tag")); } - if self.current_tag_self_closing { + if self.current_tag_self_closing.get() { self.emit_error(Borrowed("Self-closing end tag")); } }, } let token = TagToken(Tag { - kind: self.current_tag_kind, + kind: self.current_tag_kind.get(), name, - self_closing: self.current_tag_self_closing, - attrs: std::mem::take(&mut self.current_tag_attrs), + self_closing: self.current_tag_self_closing.get(), + attrs: std::mem::take(&mut self.current_tag_attrs.borrow_mut()), }); match self.process_token(token) { TokenSinkResult::Continue => ProcessResult::Continue, TokenSinkResult::Plaintext => { - self.state = states::Plaintext; + self.state.set(states::Plaintext); ProcessResult::Continue }, TokenSinkResult::Script(node) => { - self.state = states::Data; + self.state.set(states::Data); ProcessResult::Script(node) }, TokenSinkResult::RawData(kind) => { - self.state = states::RawData(kind); + self.state.set(states::RawData(kind)); ProcessResult::Continue }, } } - fn emit_temp_buf(&mut self) { + fn emit_temp_buf(&self) { // FIXME: Make sure that clearing on emit is spec-compatible. - let buf = mem::take(&mut self.temp_buf); + let buf = mem::take(&mut *self.temp_buf.borrow_mut()); self.emit_chars(buf); } - fn clear_temp_buf(&mut self) { + fn clear_temp_buf(&self) { // Do this without a new allocation. - self.temp_buf.clear(); + self.temp_buf.borrow_mut().clear(); } - fn emit_current_comment(&mut self) { - let comment = mem::take(&mut self.current_comment); + fn emit_current_comment(&self) { + let comment = mem::take(&mut *self.current_comment.borrow_mut()); self.process_token_and_continue(CommentToken(comment)); } - fn discard_tag(&mut self) { - self.current_tag_name.clear(); - self.current_tag_self_closing = false; - self.current_tag_attrs = vec![]; + fn discard_tag(&self) { + self.current_tag_name.borrow_mut().clear(); + self.current_tag_self_closing.set(false); + *self.current_tag_attrs.borrow_mut() = vec![]; } - fn create_tag(&mut self, kind: TagKind, c: char) { + fn create_tag(&self, kind: TagKind, c: char) { self.discard_tag(); - self.current_tag_name.push_char(c); - self.current_tag_kind = kind; + self.current_tag_name.borrow_mut().push_char(c); + self.current_tag_kind.set(kind); } fn have_appropriate_end_tag(&self) -> bool { - match self.last_start_tag_name.as_ref() { - Some(last) => (self.current_tag_kind == EndTag) && (*self.current_tag_name == **last), + match self.last_start_tag_name.borrow().as_ref() { + Some(last) => { + (self.current_tag_kind.get() == EndTag) + && (**self.current_tag_name.borrow() == **last) + }, None => false, } } - fn create_attribute(&mut self, c: char) { + fn create_attribute(&self, c: char) { self.finish_attribute(); - self.current_attr_name.push_char(c); + self.current_attr_name.borrow_mut().push_char(c); } - fn finish_attribute(&mut self) { - if self.current_attr_name.is_empty() { + fn finish_attribute(&self) { + if self.current_attr_name.borrow().is_empty() { return; } // Check for a duplicate attribute. // FIXME: the spec says we should error as soon as the name is finished. let dup = { - let name = &*self.current_attr_name; + let name = &*self.current_attr_name.borrow(); self.current_tag_attrs + .borrow() .iter() - .any(|a| &*a.name.local == name) + .any(|a| *a.name.local == **name) }; if dup { self.emit_error(Borrowed("Duplicate attribute")); - self.current_attr_name.clear(); - self.current_attr_value.clear(); + self.current_attr_name.borrow_mut().clear(); + self.current_attr_value.borrow_mut().clear(); } else { - let name = LocalName::from(&*self.current_attr_name); - self.current_attr_name.clear(); - self.current_tag_attrs.push(Attribute { + let name = LocalName::from(&**self.current_attr_name.borrow()); + self.current_attr_name.borrow_mut().clear(); + self.current_tag_attrs.borrow_mut().push(Attribute { // The tree builder will adjust the namespace if necessary. // This only happens in foreign elements. name: QualName::new(None, ns!(), name), - value: mem::take(&mut self.current_attr_value), + value: mem::take(&mut self.current_attr_value.borrow_mut()), }); } } - fn emit_current_doctype(&mut self) { - let doctype = mem::take(&mut self.current_doctype); + fn emit_current_doctype(&self) { + let doctype = self.current_doctype.take(); self.process_token_and_continue(DoctypeToken(doctype)); } - fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option { + fn doctype_id(&self, kind: DoctypeIdKind) -> RefMut> { + let current_doctype = self.current_doctype.borrow_mut(); match kind { - Public => &mut self.current_doctype.public_id, - System => &mut self.current_doctype.system_id, + Public => RefMut::map(current_doctype, |d| &mut d.public_id), + System => RefMut::map(current_doctype, |d| &mut d.system_id), } } - fn clear_doctype_id(&mut self, kind: DoctypeIdKind) { - let id = self.doctype_id(kind); + fn clear_doctype_id(&self, kind: DoctypeIdKind) { + let mut id = self.doctype_id(kind); match *id { Some(ref mut s) => s.clear(), None => *id = Some(StrTendril::new()), } } - fn consume_char_ref(&mut self) { - self.char_ref_tokenizer = Some(Box::new(CharRefTokenizer::new(matches!( - self.state, + fn consume_char_ref(&self) { + *self.char_ref_tokenizer.borrow_mut() = Some(Box::new(CharRefTokenizer::new(matches!( + self.state.get(), states::AttributeValue(_) )))); } - fn emit_eof(&mut self) { + fn emit_eof(&self) { self.process_token_and_continue(EOFToken); } - fn peek(&mut self, input: &BufferQueue) -> Option { - if self.reconsume { - Some(self.current_char) + fn peek(&self, input: &BufferQueue) -> Option { + if self.reconsume.get() { + Some(self.current_char.get()) } else { input.peek() } } - fn discard_char(&mut self, input: &BufferQueue) { + fn discard_char(&self, input: &BufferQueue) { // peek() deals in un-processed characters (no newline normalization), while get_char() // does. // // since discard_char is supposed to be used in combination with peek(), discard_char must // discard a single raw input character, not a normalized newline. - if self.reconsume { - self.reconsume = false; + if self.reconsume.get() { + self.reconsume.set(false); } else { input.next(); } } - fn emit_error(&mut self, error: Cow<'static, str>) { + fn emit_error(&self, error: Cow<'static, str>) { self.process_token_and_continue(ParseError(error)); } } @@ -587,25 +593,25 @@ impl Tokenizer { macro_rules! shorthand ( ( $me:ident : emit $c:expr ) => ( $me.emit_char($c) ); ( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c) ); - ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push_char($c) ); + ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.borrow_mut().push_char($c) ); ( $me:ident : discard_tag ) => ( $me.discard_tag() ); ( $me:ident : discard_char $input:expr ) => ( $me.discard_char($input) ); - ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push_char($c) ); + ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.borrow_mut().push_char($c) ); ( $me:ident : emit_temp ) => ( $me.emit_temp_buf() ); ( $me:ident : clear_temp ) => ( $me.clear_temp_buf() ); ( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c) ); - ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push_char($c) ); - ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push_char($c) ); - ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_tendril($c) ); - ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push_char($c) ); - ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c) ); + ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.borrow_mut().push_char($c) ); + ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_char($c) ); + ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_tendril($c)); + ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_char($c) ); + ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_slice($c) ); ( $me:ident : emit_comment ) => ( $me.emit_current_comment() ); - ( $me:ident : clear_comment ) => ( $me.current_comment.clear() ); - ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::default() ); - ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.name, $c) ); - ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push($me.doctype_id($k), $c) ); + ( $me:ident : clear_comment ) => ( $me.current_comment.borrow_mut().clear() ); + ( $me:ident : create_doctype ) => ( *$me.current_doctype.borrow_mut() = Doctype::default() ); + ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.borrow_mut().name, $c) ); + ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push(&mut $me.doctype_id($k), $c) ); ( $me:ident : clear_doctype_id $k:ident ) => ( $me.clear_doctype_id($k) ); - ( $me:ident : force_quirks ) => ( $me.current_doctype.force_quirks = true ); + ( $me:ident : force_quirks ) => ( $me.current_doctype.borrow_mut().force_quirks = true); ( $me:ident : emit_doctype ) => ( $me.emit_current_doctype() ); ( $me:ident : error ) => ( $me.bad_char_error() ); ( $me:ident : error_eof ) => ( $me.bad_eof_error() ); @@ -634,19 +640,19 @@ macro_rules! go ( // These can only come at the end. - ( $me:ident : to $s:ident ) => ({ $me.state = states::$s; return ProcessResult::Continue; }); - ( $me:ident : to $s:ident $k1:expr ) => ({ $me.state = states::$s($k1); return ProcessResult::Continue; }); - ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state = states::$s($k1($k2)); return ProcessResult::Continue; }); + ( $me:ident : to $s:ident ) => ({ $me.state.set(states::$s); return ProcessResult::Continue; }); + ( $me:ident : to $s:ident $k1:expr ) => ({ $me.state.set(states::$s($k1)); return ProcessResult::Continue; }); + ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state.set(states::$s($k1($k2))); return ProcessResult::Continue; }); - ( $me:ident : reconsume $s:ident ) => ({ $me.reconsume = true; go!($me: to $s); }); - ( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1); }); - ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1 $k2); }); + ( $me:ident : reconsume $s:ident ) => ({ $me.reconsume.set(true); go!($me: to $s); }); + ( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1); }); + ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1 $k2); }); ( $me:ident : consume_char_ref ) => ({ $me.consume_char_ref(); return ProcessResult::Continue; }); // We have a default next state after emitting a tag, but the sink can override. ( $me:ident : emit_tag $s:ident ) => ({ - $me.state = states::$s; + $me.state.set(states::$s); return $me.emit_current_tag(); }); @@ -693,13 +699,13 @@ impl Tokenizer { // Return true if we should be immediately re-invoked // (this just simplifies control flow vs. break / continue). #[allow(clippy::never_loop)] - fn step(&mut self, input: &BufferQueue) -> ProcessResult { - if self.char_ref_tokenizer.is_some() { + fn step(&self, input: &BufferQueue) -> ProcessResult { + if self.char_ref_tokenizer.borrow().is_some() { return self.step_char_ref_tokenizer(input); } trace!("processing in state {:?}", self.state); - match self.state { + match self.state.get() { //§ data-state states::Data => loop { match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { @@ -876,7 +882,7 @@ impl Tokenizer { let c = get_char!(self, input); match c { '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => { - let esc = if &*self.temp_buf == "script" { + let esc = if &**self.temp_buf.borrow() == "script" { DoubleEscaped } else { Escaped @@ -942,7 +948,7 @@ impl Tokenizer { let c = get_char!(self, input); match c { '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => { - let esc = if &*self.temp_buf == "script" { + let esc = if &**self.temp_buf.borrow() == "script" { Escaped } else { DoubleEscaped @@ -1083,7 +1089,7 @@ impl Tokenizer { states::SelfClosingStartTag => loop { match get_char!(self, input) { '>' => { - self.current_tag_self_closing = true; + self.current_tag_self_closing.set(true); go!(self: emit_tag Data); }, _ => go!(self: error; reconsume BeforeAttributeName), @@ -1379,7 +1385,7 @@ impl Tokenizer { } } - fn step_char_ref_tokenizer(&mut self, input: &BufferQueue) -> ProcessResult { + fn step_char_ref_tokenizer(&self, input: &BufferQueue) -> ProcessResult { // FIXME HACK: Take and replace the tokenizer so we don't // double-mut-borrow self. This is why it's boxed. let mut tok = self.char_ref_tokenizer.take().unwrap(); @@ -1395,11 +1401,11 @@ impl Tokenizer { char_ref::Progress => ProcessResult::Continue, }; - self.char_ref_tokenizer = Some(tok); + *self.char_ref_tokenizer.borrow_mut() = Some(tok); progress } - fn process_char_ref(&mut self, char_ref: CharRef) { + fn process_char_ref(&self, char_ref: CharRef) { let CharRef { mut chars, mut num_chars, @@ -1412,21 +1418,21 @@ impl Tokenizer { for i in 0..num_chars { let c = chars[i as usize]; - match self.state { + match self.state.get() { states::Data | states::RawData(states::Rcdata) => go!(self: emit c), states::AttributeValue(_) => go!(self: push_value c), _ => panic!( "state {:?} should not be reachable in process_char_ref", - self.state + self.state.get() ), } } } /// Indicate that we have reached the end of the input. - pub fn end(&mut self) { + pub fn end(&self) { // Handle EOF in the char ref sub-tokenizer, if there is one. // Do this first because it might un-consume stuff. let input = BufferQueue::default(); @@ -1440,7 +1446,7 @@ impl Tokenizer { // Process all remaining buffered input. // If we're waiting for lookahead, we're not gonna get it. - self.at_eof = true; + self.at_eof.set(true); assert!(matches!(self.run(&input), TokenizerResult::Done)); assert!(input.is_empty()); @@ -1460,8 +1466,12 @@ impl Tokenizer { } fn dump_profile(&self) { - let mut results: Vec<(states::State, u64)> = - self.state_profile.iter().map(|(s, t)| (*s, *t)).collect(); + let mut results: Vec<(states::State, u64)> = self + .state_profile + .borrow() + .iter() + .map(|(s, t)| (*s, *t)) + .collect(); results.sort_by(|&(_, x), &(_, y)| y.cmp(&x)); let total: u64 = results @@ -1469,7 +1479,10 @@ impl Tokenizer { .map(|&(_, t)| t) .fold(0, ::std::ops::Add::add); println!("\nTokenizer profile, in nanoseconds"); - println!("\n{:12} total in token sink", self.time_in_sink); + println!( + "\n{:12} total in token sink", + self.time_in_sink.get() + ); println!("\n{:12} total in tokenizer", total); for (k, v) in results.into_iter() { @@ -1478,9 +1491,9 @@ impl Tokenizer { } } - fn eof_step(&mut self) -> ProcessResult { - debug!("processing EOF in state {:?}", self.state); - match self.state { + fn eof_step(&self) -> ProcessResult { + debug!("processing EOF in state {:?}", self.state.get()); + match self.state.get() { states::Data | states::RawData(Rcdata) | states::RawData(Rawtext) @@ -1582,7 +1595,7 @@ mod test { use super::interface::{TagToken, Token}; use markup5ever::buffer_queue::BufferQueue; - use std::mem; + use std::cell::RefCell; use crate::LocalName; @@ -1590,29 +1603,29 @@ mod test { // if current_line is being updated when process_token is called. The lines // vector is a collection of the line numbers that each token is on. struct LinesMatch { - tokens: Vec, - current_str: StrTendril, - lines: Vec<(Token, u64)>, + tokens: RefCell>, + current_str: RefCell, + lines: RefCell>, } impl LinesMatch { fn new() -> LinesMatch { LinesMatch { - tokens: vec![], - current_str: StrTendril::new(), - lines: vec![], + tokens: RefCell::new(vec![]), + current_str: RefCell::new(StrTendril::new()), + lines: RefCell::new(vec![]), } } - fn push(&mut self, token: Token, line_number: u64) { + fn push(&self, token: Token, line_number: u64) { self.finish_str(); - self.lines.push((token, line_number)); + self.lines.borrow_mut().push((token, line_number)); } - fn finish_str(&mut self) { - if self.current_str.len() > 0 { - let s = mem::take(&mut self.current_str); - self.tokens.push(CharacterTokens(s)); + fn finish_str(&self) { + if self.current_str.borrow().len() > 0 { + let s = self.current_str.take(); + self.tokens.borrow_mut().push(CharacterTokens(s)); } } } @@ -1620,18 +1633,14 @@ mod test { impl TokenSink for LinesMatch { type Handle = (); - fn process_token( - &mut self, - token: Token, - line_number: u64, - ) -> TokenSinkResult { + fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult { match token { CharacterTokens(b) => { - self.current_str.push_slice(&b); + self.current_str.borrow_mut().push_slice(&b); }, NullCharacterToken => { - self.current_str.push_char('\0'); + self.current_str.borrow_mut().push_char('\0'); }, ParseError(_) => { @@ -1664,14 +1673,14 @@ mod test { // numbers that each token is on fn tokenize(input: Vec, opts: TokenizerOpts) -> Vec<(Token, u64)> { let sink = LinesMatch::new(); - let mut tok = Tokenizer::new(sink, opts); + let tok = Tokenizer::new(sink, opts); let buffer = BufferQueue::default(); for chunk in input.into_iter() { buffer.push_back(chunk); let _ = tok.feed(&buffer); } tok.end(); - tok.sink.lines + tok.sink.lines.take() } // Create a tag token diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 20f6fb71..fa1a3b9d 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -25,6 +25,7 @@ use crate::tokenizer::states as tok_state; use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult}; use std::borrow::Cow::Borrowed; +use std::cell::{Cell, Ref, RefCell, RefMut}; use std::collections::VecDeque; use std::iter::{Enumerate, Rev}; use std::mem; @@ -92,51 +93,51 @@ pub struct TreeBuilder { pub sink: Sink, /// Insertion mode. - mode: InsertionMode, + mode: Cell, /// Original insertion mode, used by Text and InTableText modes. - orig_mode: Option, + orig_mode: Cell>, /// Stack of template insertion modes. - template_modes: Vec, + template_modes: RefCell>, /// Pending table character tokens. - pending_table_text: Vec<(SplitStatus, StrTendril)>, + pending_table_text: RefCell>, /// Quirks mode as set by the parser. /// FIXME: can scripts etc. change this? - quirks_mode: QuirksMode, + quirks_mode: Cell, /// The document node, which is created by the sink. doc_handle: Handle, /// Stack of open elements, most recently added at end. - open_elems: Vec, + open_elems: RefCell>, /// List of active formatting elements. - active_formatting: Vec>, + active_formatting: RefCell>>, //§ the-element-pointers /// Head element pointer. - head_elem: Option, + head_elem: RefCell>, /// Form element pointer. - form_elem: Option, + form_elem: RefCell>, //§ END /// Frameset-ok flag. - frameset_ok: bool, + frameset_ok: Cell, /// Ignore a following U+000A LINE FEED? - ignore_lf: bool, + ignore_lf: Cell, /// Is foster parenting enabled? - foster_parenting: bool, + foster_parenting: Cell, /// The context element for the fragment parsing algorithm. - context_elem: Option, + context_elem: RefCell>, /// Track current line - current_line: u64, + current_line: Cell, // WARNING: If you add new fields that contain Handles, you // must add them to trace_handles() below to preserve memory // safety! @@ -157,21 +158,21 @@ where TreeBuilder { opts: opts, sink: sink, - mode: Initial, - orig_mode: None, - template_modes: vec![], - pending_table_text: vec![], - quirks_mode: opts.quirks_mode, + mode: Cell::new(Initial), + orig_mode: Cell::new(None), + template_modes: Default::default(), + pending_table_text: Default::default(), + quirks_mode: Cell::new(opts.quirks_mode), doc_handle: doc_handle, - open_elems: vec![], - active_formatting: vec![], - head_elem: None, - form_elem: None, - frameset_ok: true, - ignore_lf: false, - foster_parenting: false, - context_elem: None, - current_line: 1, + open_elems: Default::default(), + active_formatting: Default::default(), + head_elem: Default::default(), + form_elem: Default::default(), + frameset_ok: Cell::new(true), + ignore_lf: Default::default(), + foster_parenting: Default::default(), + context_elem: Default::default(), + current_line: Cell::new(1), } } @@ -190,25 +191,25 @@ where let mut tb = TreeBuilder { opts: opts, sink: sink, - mode: Initial, - orig_mode: None, - template_modes: if context_is_template { + mode: Cell::new(Initial), + orig_mode: Cell::new(None), + template_modes: RefCell::new(if context_is_template { vec![InTemplate] } else { vec![] - }, - pending_table_text: vec![], - quirks_mode: opts.quirks_mode, + }), + pending_table_text: Default::default(), + quirks_mode: Cell::new(opts.quirks_mode), doc_handle: doc_handle, - open_elems: vec![], - active_formatting: vec![], - head_elem: None, - form_elem: form_elem, - frameset_ok: true, - ignore_lf: false, - foster_parenting: false, - context_elem: Some(context_elem), - current_line: 1, + open_elems: Default::default(), + active_formatting: Default::default(), + head_elem: Default::default(), + form_elem: RefCell::new(form_elem), + frameset_ok: Cell::new(true), + ignore_lf: Default::default(), + foster_parenting: Default::default(), + context_elem: RefCell::new(Some(context_elem)), + current_line: Cell::new(1), }; // https://html.spec.whatwg.org/multipage/#parsing-html-fragments @@ -217,7 +218,8 @@ where // 7. Set up the parser's stack of open elements so that it contains just the single element root. tb.create_root(vec![]); // 10. Reset the parser's insertion mode appropriately. - tb.mode = tb.reset_insertion_mode(); + let old_insertion_mode = tb.reset_insertion_mode(); + tb.mode.set(old_insertion_mode); tb } @@ -225,7 +227,8 @@ where // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context // Step 4. Set the state of the HTML parser's tokenization stage as follows: pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State { - let elem = self.context_elem.as_ref().expect("no context element"); + let context_elem = self.context_elem.borrow(); + let elem = context_elem.as_ref().expect("no context element"); let name = match self.sink.elem_name(elem) { ExpandedName { ns: &ns!(html), @@ -262,25 +265,34 @@ where /// internal state. This is intended to support garbage-collected DOMs. pub fn trace_handles(&self, tracer: &dyn Tracer) { tracer.trace_handle(&self.doc_handle); - for e in &self.open_elems { + for e in &*self.open_elems.borrow() { tracer.trace_handle(e); } - for e in &self.active_formatting { + for e in &*self.active_formatting.borrow() { match e { &Element(ref h, _) => tracer.trace_handle(h), _ => (), } } - self.head_elem.as_ref().map(|h| tracer.trace_handle(h)); - self.form_elem.as_ref().map(|h| tracer.trace_handle(h)); - self.context_elem.as_ref().map(|h| tracer.trace_handle(h)); + self.head_elem + .borrow() + .as_ref() + .map(|h| tracer.trace_handle(h)); + self.form_elem + .borrow() + .as_ref() + .map(|h| tracer.trace_handle(h)); + self.context_elem + .borrow() + .as_ref() + .map(|h| tracer.trace_handle(h)); } #[allow(dead_code)] fn dump_state(&self, label: String) { println!("dump_state on {}", label); print!(" open_elems:"); - for node in self.open_elems.iter() { + for node in self.open_elems.borrow().iter() { let name = self.sink.elem_name(node); match *name.ns { ns!(html) => print!(" {}", name.local), @@ -289,7 +301,7 @@ where } println!(""); print!(" active_formatting:"); - for entry in self.active_formatting.iter() { + for entry in self.active_formatting.borrow().iter() { match entry { &Marker => print!(" Marker"), &Element(ref h, _) => { @@ -314,7 +326,7 @@ where } } - fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult { + fn process_to_completion(&self, mut token: Token) -> TokenSinkResult { // Queue of additional tokens yet to be processed. // This stays empty in the common case where we don't split whitespace. let mut more_tokens = VecDeque::new(); @@ -331,7 +343,7 @@ where let result = if self.is_foreign(&token) { self.step_foreign(token) } else { - let mode = self.mode; + let mode = self.mode.get(); self.step(mode, token) }; match result { @@ -352,7 +364,7 @@ where ); }, Reprocess(m, t) => { - self.mode = m; + self.mode.set(m); token = t; }, ReprocessForeign(t) => { @@ -386,19 +398,19 @@ where /// Are we parsing a HTML fragment? pub fn is_fragment(&self) -> bool { - self.context_elem.is_some() + self.context_elem.borrow().is_some() } /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node fn appropriate_place_for_insertion( - &mut self, + &self, override_target: Option, ) -> InsertionPoint { use self::tag_sets::*; declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr"); let target = override_target.unwrap_or_else(|| self.current_node().clone()); - if !(self.foster_parenting && self.elem_in(&target, foster_target)) { + if !(self.foster_parenting.get() && self.elem_in(&target, foster_target)) { if self.html_elem_named(&target, local_name!("template")) { // No foster parenting (inside template). let contents = self.sink.get_template_contents(&target); @@ -410,7 +422,8 @@ where } // Foster parenting - let mut iter = self.open_elems.iter().rev().peekable(); + let open_elems = self.open_elems.borrow(); + let mut iter = open_elems.iter().rev().peekable(); while let Some(elem) = iter.next() { if self.html_elem_named(&elem, local_name!("template")) { let contents = self.sink.get_template_contents(&elem); @@ -426,7 +439,7 @@ where LastChild(html_elem.clone()) } - fn insert_at(&mut self, insertion_point: InsertionPoint, child: NodeOrText) { + fn insert_at(&self, insertion_point: InsertionPoint, child: NodeOrText) { match insertion_point { LastChild(parent) => self.sink.append(&parent, child), BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child), @@ -447,15 +460,11 @@ where { type Handle = Handle; - fn process_token( - &mut self, - token: tokenizer::Token, - line_number: u64, - ) -> TokenSinkResult { - if line_number != self.current_line { + fn process_token(&self, token: tokenizer::Token, line_number: u64) -> TokenSinkResult { + if line_number != self.current_line.get() { self.sink.set_current_line(line_number); } - let ignore_lf = mem::take(&mut self.ignore_lf); + let ignore_lf = self.ignore_lf.take(); // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type. let token = match token { @@ -465,7 +474,7 @@ where }, tokenizer::DoctypeToken(dt) => { - if self.mode == Initial { + if self.mode.get() == Initial { let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc); if err { self.sink.parse_error(format_if!( @@ -490,14 +499,14 @@ where } self.set_quirks_mode(quirk); - self.mode = BeforeHtml; + self.mode.set(BeforeHtml); return tokenizer::TokenSinkResult::Continue; } else { self.sink.parse_error(format_if!( self.opts.exact_errors, "DOCTYPE in body", "DOCTYPE in insertion mode {:?}", - self.mode + self.mode.get() )); return tokenizer::TokenSinkResult::Continue; } @@ -522,15 +531,15 @@ where self.process_to_completion(token) } - fn end(&mut self) { - for elem in self.open_elems.drain(..).rev() { + fn end(&self) { + for elem in self.open_elems.borrow_mut().drain(..).rev() { self.sink.pop(&elem); } } fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { - !self.open_elems.is_empty() - && self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) + !self.open_elems.borrow().is_empty() + && self.sink.elem_name(&self.adjusted_current_node()).ns != &ns!(html) } } @@ -538,6 +547,18 @@ pub fn html_elem(open_elems: &[Handle]) -> &Handle { &open_elems[0] } +struct ActiveFormattingView<'a, Handle: 'a> { + data: Ref<'a, Vec>>, +} + +impl<'a, Handle: 'a> ActiveFormattingView<'a, Handle> { + fn iter(&'a self) -> impl Iterator + 'a { + ActiveFormattingIter { + iter: self.data.iter().enumerate().rev(), + } + } +} + pub struct ActiveFormattingIter<'a, Handle: 'a> { iter: Rev>>>, } @@ -585,42 +606,45 @@ where Handle: Clone, Sink: TreeSink, { - fn unexpected(&mut self, _thing: &T) -> ProcessResult { + fn unexpected(&self, _thing: &T) -> ProcessResult { self.sink.parse_error(format_if!( self.opts.exact_errors, "Unexpected token", "Unexpected token {} in insertion mode {:?}", to_escaped_string(_thing), - self.mode + self.mode.get() )); Done } - fn assert_named(&mut self, node: &Handle, name: LocalName) { + fn assert_named(&self, node: &Handle, name: LocalName) { assert!(self.html_elem_named(&node, name)); } /// Iterate over the active formatting elements (with index in the list) from the end /// to the last marker, or the beginning if there are no markers. - fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle> { - ActiveFormattingIter { - iter: self.active_formatting.iter().enumerate().rev(), + fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingView<'a, Handle> { + ActiveFormattingView { + data: Ref::map(self.active_formatting.borrow(), |a| &*a), } } fn position_in_active_formatting(&self, element: &Handle) -> Option { - self.active_formatting.iter().position(|n| match n { - &Marker => false, - &Element(ref handle, _) => self.sink.same_node(handle, element), - }) + self.active_formatting + .borrow() + .iter() + .position(|n| match n { + &Marker => false, + &Element(ref handle, _) => self.sink.same_node(handle, element), + }) } - fn set_quirks_mode(&mut self, mode: QuirksMode) { - self.quirks_mode = mode; + fn set_quirks_mode(&self, mode: QuirksMode) { + self.quirks_mode.set(mode); self.sink.set_quirks_mode(mode); } - fn stop_parsing(&mut self) -> ProcessResult { + fn stop_parsing(&self) -> ProcessResult { Done } @@ -629,26 +653,30 @@ where // switch the tokenizer to a raw-data state. // The latter only takes effect after the current / next // `process_token` of a start tag returns! - fn to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult { - self.orig_mode = Some(self.mode); - self.mode = Text; + fn to_raw_text_mode(&self, k: RawKind) -> ProcessResult { + self.orig_mode.set(Some(self.mode.get())); + self.mode.set(Text); ToRawData(k) } // The generic raw text / RCDATA parsing algorithm. - fn parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult { + fn parse_raw_data(&self, tag: Tag, k: RawKind) -> ProcessResult { self.insert_element_for(tag); self.to_raw_text_mode(k) } //§ END - fn current_node(&self) -> &Handle { - self.open_elems.last().expect("no current element") + fn current_node(&self) -> Ref { + Ref::map(self.open_elems.borrow(), |elems| { + elems.last().expect("no current element") + }) } - fn adjusted_current_node(&self) -> &Handle { - if self.open_elems.len() == 1 { - if let Some(ctx) = self.context_elem.as_ref() { + fn adjusted_current_node(&self) -> Ref { + if self.open_elems.borrow().len() == 1 { + let context_elem = self.context_elem.borrow(); + let ctx = Ref::filter_map(context_elem, |e| e.as_ref()); + if let Ok(ctx) = ctx { return ctx; } } @@ -659,20 +687,20 @@ where where TagSet: Fn(ExpandedName) -> bool, { - set(self.sink.elem_name(self.current_node())) + set(self.sink.elem_name(&self.current_node())) } // Insert at the "appropriate place for inserting a node". - fn insert_appropriately(&mut self, child: NodeOrText, override_target: Option) { + fn insert_appropriately(&self, child: NodeOrText, override_target: Option) { let insertion_point = self.appropriate_place_for_insertion(override_target); self.insert_at(insertion_point, child); } - fn adoption_agency(&mut self, subject: LocalName) { + fn adoption_agency(&self, subject: LocalName) { // 1. if self.current_node_named(subject.clone()) { if self - .position_in_active_formatting(self.current_node()) + .position_in_active_formatting(&self.current_node()) .is_none() { self.pop(); @@ -686,6 +714,7 @@ where let (fmt_elem_index, fmt_elem, fmt_elem_tag) = unwrap_or_return!( // We clone the Handle and Tag so they don't cause an immutable borrow of self. self.active_formatting_end_to_marker() + .iter() .filter(|&(_, _, tag)| tag.name == subject) .next() .map(|(i, h, t)| (i, h.clone(), t.clone())), @@ -701,12 +730,13 @@ where let fmt_elem_stack_index = unwrap_or_return!( self.open_elems + .borrow() .iter() .rposition(|n| self.sink.same_node(n, &fmt_elem)), { self.sink .parse_error(Borrowed("Formatting element not open")); - self.active_formatting.remove(fmt_elem_index); + self.active_formatting.borrow_mut().remove(fmt_elem_index); } ); @@ -718,7 +748,7 @@ where } // 8. - if !self.sink.same_node(self.current_node(), &fmt_elem) { + if !self.sink.same_node(&self.current_node(), &fmt_elem) { self.sink .parse_error(Borrowed("Formatting element not current node")); } @@ -726,6 +756,7 @@ where // 9. let (furthest_block_index, furthest_block) = unwrap_or_return!( self.open_elems + .borrow() .iter() .enumerate() .skip(fmt_elem_stack_index) @@ -734,13 +765,13 @@ where .map(|(i, h)| (i, h.clone())), // 10. { - self.open_elems.truncate(fmt_elem_stack_index); - self.active_formatting.remove(fmt_elem_index); + self.open_elems.borrow_mut().truncate(fmt_elem_stack_index); + self.active_formatting.borrow_mut().remove(fmt_elem_index); } ); // 11. - let common_ancestor = self.open_elems[fmt_elem_stack_index - 1].clone(); + let common_ancestor = self.open_elems.borrow()[fmt_elem_stack_index - 1].clone(); // 12. let mut bookmark = Bookmark::Replace(fmt_elem.clone()); @@ -758,7 +789,7 @@ where // 13.3. node_index -= 1; - node = self.open_elems[node_index].clone(); + node = self.open_elems.borrow()[node_index].clone(); // 13.4. if self.sink.same_node(&node, &fmt_elem) { @@ -768,8 +799,8 @@ where // 13.5. if inner_counter > 3 { self.position_in_active_formatting(&node) - .map(|position| self.active_formatting.remove(position)); - self.open_elems.remove(node_index); + .map(|position| self.active_formatting.borrow_mut().remove(position)); + self.open_elems.borrow_mut().remove(node_index); continue; } @@ -777,13 +808,13 @@ where self.position_in_active_formatting(&node), // 13.6. { - self.open_elems.remove(node_index); + self.open_elems.borrow_mut().remove(node_index); continue; } ); // 13.7. - let tag = match self.active_formatting[node_formatting_index] { + let tag = match self.active_formatting.borrow()[node_formatting_index] { Element(ref h, ref t) => { assert!(self.sink.same_node(h, &node)); t.clone() @@ -793,12 +824,13 @@ where // FIXME: Is there a way to avoid cloning the attributes twice here (once on their // own, once as part of t.clone() above)? let new_element = create_element( - &mut self.sink, + &self.sink, QualName::new(None, ns!(html), tag.name.clone()), tag.attrs.clone(), ); - self.open_elems[node_index] = new_element.clone(); - self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag); + self.open_elems.borrow_mut()[node_index] = new_element.clone(); + self.active_formatting.borrow_mut()[node_formatting_index] = + Element(new_element.clone(), tag); node = new_element; // 13.8. @@ -824,7 +856,7 @@ where // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, // once as part of t.clone() above)? let new_element = create_element( - &mut self.sink, + &self.sink, QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), fmt_elem_tag.attrs.clone(), ); @@ -845,18 +877,18 @@ where let index = self .position_in_active_formatting(&to_replace) .expect("bookmark not found in active formatting elements"); - self.active_formatting[index] = new_entry; + self.active_formatting.borrow_mut()[index] = new_entry; }, Bookmark::InsertAfter(previous) => { let index = self .position_in_active_formatting(&previous) .expect("bookmark not found in active formatting elements") + 1; - self.active_formatting.insert(index, new_entry); + self.active_formatting.borrow_mut().insert(index, new_entry); let old_index = self .position_in_active_formatting(&fmt_elem) .expect("formatting element not found in active formatting elements"); - self.active_formatting.remove(old_index); + self.active_formatting.borrow_mut().remove(old_index); }, } @@ -864,35 +896,41 @@ where self.remove_from_stack(&fmt_elem); let new_furthest_block_index = self .open_elems + .borrow() .iter() .position(|n| self.sink.same_node(n, &furthest_block)) .expect("furthest block missing from open element stack"); self.open_elems + .borrow_mut() .insert(new_furthest_block_index + 1, new_element); // 20. } } - fn push(&mut self, elem: &Handle) { - self.open_elems.push(elem.clone()); + fn push(&self, elem: &Handle) { + self.open_elems.borrow_mut().push(elem.clone()); } - fn pop(&mut self) -> Handle { - let elem = self.open_elems.pop().expect("no current element"); + fn pop(&self) -> Handle { + let elem = self + .open_elems + .borrow_mut() + .pop() + .expect("no current element"); self.sink.pop(&elem); elem } - fn remove_from_stack(&mut self, elem: &Handle) { - let sink = &mut self.sink; + fn remove_from_stack(&self, elem: &Handle) { let position = self .open_elems + .borrow() .iter() - .rposition(|x| sink.same_node(elem, &x)); + .rposition(|x| self.sink.same_node(elem, &x)); if let Some(position) = position { - self.open_elems.remove(position); - sink.pop(elem); + self.open_elems.borrow_mut().remove(position); + self.sink.pop(elem); } } @@ -901,6 +939,7 @@ where Marker => true, Element(ref node, _) => self .open_elems + .borrow() .iter() .rev() .any(|n| self.sink.same_node(&n, &node)), @@ -908,28 +947,29 @@ where } /// Reconstruct the active formatting elements. - fn reconstruct_formatting(&mut self) { + fn reconstruct_formatting(&self) { { - let last = unwrap_or_return!(self.active_formatting.last(), ()); + let active_formatting = self.active_formatting.borrow(); + let last = unwrap_or_return!(active_formatting.last(), ()); if self.is_marker_or_open(last) { return; } } - let mut entry_index = self.active_formatting.len() - 1; + let mut entry_index = self.active_formatting.borrow().len() - 1; loop { if entry_index == 0 { break; } entry_index -= 1; - if self.is_marker_or_open(&self.active_formatting[entry_index]) { + if self.is_marker_or_open(&self.active_formatting.borrow()[entry_index]) { entry_index += 1; break; } } loop { - let tag = match self.active_formatting[entry_index] { + let tag = match self.active_formatting.borrow()[entry_index] { Element(_, ref t) => t.clone(), Marker => panic!("Found marker during formatting element reconstruction"), }; @@ -938,8 +978,8 @@ where // once as part of t.clone() above)? let new_element = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); - self.active_formatting[entry_index] = Element(new_element, tag); - if entry_index == self.active_formatting.len() - 1 { + self.active_formatting.borrow_mut()[entry_index] = Element(new_element, tag); + if entry_index == self.active_formatting.borrow().len() - 1 { break; } entry_index += 1; @@ -947,18 +987,18 @@ where } /// Get the first element on the stack, which will be the element. - fn html_elem(&self) -> &Handle { - &self.open_elems[0] + fn html_elem(&self) -> Ref { + Ref::map(self.open_elems.borrow(), |elems| &elems[0]) } /// Get the second element on the stack, if it's a HTML body element. - fn body_elem(&self) -> Option<&Handle> { - if self.open_elems.len() <= 1 { + fn body_elem(&self) -> Option> { + if self.open_elems.borrow().len() <= 1 { return None; } - let node = &self.open_elems[1]; - if self.html_elem_named(node, local_name!("body")) { + let node = Ref::map(self.open_elems.borrow(), |elems| &elems[1]); + if self.html_elem_named(&node, local_name!("body")) { Some(node) } else { None @@ -967,12 +1007,12 @@ where /// Signal an error depending on the state of the stack of open elements at /// the end of the body. - fn check_body_end(&mut self) { + fn check_body_end(&self) { declare_tag_set!(body_end_ok = "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th" "thead" "tr" "body" "html"); - for elem in self.open_elems.iter() { + for elem in self.open_elems.borrow().iter() { let error; { let name = self.sink.elem_name(elem); @@ -998,7 +1038,7 @@ where TagSet: Fn(ExpandedName) -> bool, Pred: Fn(Handle) -> bool, { - for node in self.open_elems.iter().rev() { + for node in self.open_elems.borrow().iter().rev() { if pred(node.clone()) { return true; } @@ -1026,12 +1066,13 @@ where fn in_html_elem_named(&self, name: LocalName) -> bool { self.open_elems + .borrow() .iter() .any(|elem| self.html_elem_named(elem, name.clone())) } fn current_node_named(&self, name: LocalName) -> bool { - self.html_elem_named(self.current_node(), name) + self.html_elem_named(&self.current_node(), name) } fn in_scope_named(&self, scope: TagSet, name: LocalName) -> bool @@ -1042,13 +1083,14 @@ where } //§ closing-elements-that-have-implied-end-tags - fn generate_implied_end(&mut self, set: TagSet) + fn generate_implied_end(&self, set: TagSet) where TagSet: Fn(ExpandedName) -> bool, { loop { { - let elem = unwrap_or_return!(self.open_elems.last(), ()); + let open_elems = self.open_elems.borrow(); + let elem = unwrap_or_return!(open_elems.last(), ()); let nsname = self.sink.elem_name(elem); if !set(nsname) { return; @@ -1058,7 +1100,7 @@ where } } - fn generate_implied_end_except(&mut self, except: LocalName) { + fn generate_implied_end_except(&self, except: LocalName) { self.generate_implied_end(|p| { if *p.ns == ns!(html) && *p.local == except { false @@ -1070,7 +1112,7 @@ where //§ END // Pop elements until the current element is in the set. - fn pop_until_current(&mut self, pred: TagSet) + fn pop_until_current(&self, pred: TagSet) where TagSet: Fn(ExpandedName) -> bool, { @@ -1078,20 +1120,20 @@ where if self.current_node_in(|x| pred(x)) { break; } - self.open_elems.pop(); + self.open_elems.borrow_mut().pop(); } } // Pop elements until an element from the set has been popped. Returns the // number of elements popped. - fn pop_until

(&mut self, pred: P) -> usize + fn pop_until

(&self, pred: P) -> usize where P: Fn(ExpandedName) -> bool, { let mut n = 0; loop { n += 1; - match self.open_elems.pop() { + match self.open_elems.borrow_mut().pop() { None => break, Some(elem) => { if pred(self.sink.elem_name(&elem)) { @@ -1103,13 +1145,13 @@ where n } - fn pop_until_named(&mut self, name: LocalName) -> usize { + fn pop_until_named(&self, name: LocalName) -> usize { self.pop_until(|p| *p.ns == ns!(html) && *p.local == name) } // Pop elements until one with the specified name has been popped. // Signal an error if it was not the first one. - fn expect_to_close(&mut self, name: LocalName) { + fn expect_to_close(&self, name: LocalName) { if self.pop_until_named(name.clone()) != 1 { self.sink.parse_error(format_if!( self.opts.exact_errors, @@ -1120,13 +1162,13 @@ where } } - fn close_p_element(&mut self) { + fn close_p_element(&self) { declare_tag_set!(implied = [cursory_implied_end] - "p"); self.generate_implied_end(implied); self.expect_to_close(local_name!("p")); } - fn close_p_element_in_button_scope(&mut self) { + fn close_p_element_in_button_scope(&self) { if self.in_scope_named(button_scope, local_name!("p")) { self.close_p_element(); } @@ -1144,20 +1186,20 @@ where } } - fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult { + fn foster_parent_in_body(&self, token: Token) -> ProcessResult { warn!("foster parenting not implemented"); - self.foster_parenting = true; + self.foster_parenting.set(true); let res = self.step(InBody, token); // FIXME: what if res is Reprocess? - self.foster_parenting = false; + self.foster_parenting.set(false); res } - fn process_chars_in_table(&mut self, token: Token) -> ProcessResult { + fn process_chars_in_table(&self, token: Token) -> ProcessResult { declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr"); if self.current_node_in(table_outer) { - assert!(self.pending_table_text.is_empty()); - self.orig_mode = Some(self.mode); + assert!(self.pending_table_text.borrow().is_empty()); + self.orig_mode.set(Some(self.mode.get())); Reprocess(InTableText, token) } else { self.sink.parse_error(format_if!( @@ -1171,10 +1213,12 @@ where } // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately - fn reset_insertion_mode(&mut self) -> InsertionMode { - for (i, mut node) in self.open_elems.iter().enumerate().rev() { + fn reset_insertion_mode(&self) -> InsertionMode { + let open_elems = self.open_elems.borrow(); + for (i, mut node) in open_elems.iter().enumerate().rev() { let last = i == 0usize; - if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) { + let context_elem = self.context_elem.borrow(); + if let (true, Some(ctx)) = (last, context_elem.as_ref()) { node = ctx; } let name = match self.sink.elem_name(node) { @@ -1186,7 +1230,7 @@ where }; match *name { local_name!("select") => { - for ancestor in self.open_elems[0..i].iter().rev() { + for ancestor in self.open_elems.borrow()[0..i].iter().rev() { if self.html_elem_named(ancestor, local_name!("template")) { return InSelect; } else if self.html_elem_named(ancestor, local_name!("table")) { @@ -1207,7 +1251,7 @@ where local_name!("caption") => return InCaption, local_name!("colgroup") => return InColumnGroup, local_name!("table") => return InTable, - local_name!("template") => return *self.template_modes.last().unwrap(), + local_name!("template") => return *self.template_modes.borrow().last().unwrap(), local_name!("head") => { if !last { return InHead; @@ -1215,7 +1259,7 @@ where }, local_name!("body") => return InBody, local_name!("frameset") => return InFrameset, - local_name!("html") => match self.head_elem { + local_name!("html") => match *self.head_elem.borrow() { None => return BeforeHead, Some(_) => return AfterHead, }, @@ -1226,7 +1270,7 @@ where InBody } - fn close_the_cell(&mut self) { + fn close_the_cell(&self) { self.generate_implied_end(cursory_implied_end); if self.pop_until(td_th) != 1 { self.sink @@ -1235,34 +1279,35 @@ where self.clear_active_formatting_to_marker(); } - fn append_text(&mut self, text: StrTendril) -> ProcessResult { + fn append_text(&self, text: StrTendril) -> ProcessResult { self.insert_appropriately(AppendText(text), None); Done } - fn append_comment(&mut self, text: StrTendril) -> ProcessResult { + fn append_comment(&self, text: StrTendril) -> ProcessResult { let comment = self.sink.create_comment(text); self.insert_appropriately(AppendNode(comment), None); Done } - fn append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult { + fn append_comment_to_doc(&self, text: StrTendril) -> ProcessResult { let comment = self.sink.create_comment(text); self.sink.append(&self.doc_handle, AppendNode(comment)); Done } - fn append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult { - let target = html_elem(&self.open_elems); + fn append_comment_to_html(&self, text: StrTendril) -> ProcessResult { + let open_elems = self.open_elems.borrow(); + let target = html_elem(&*open_elems); let comment = self.sink.create_comment(text); self.sink.append(target, AppendNode(comment)); Done } //§ creating-and-inserting-nodes - fn create_root(&mut self, attrs: Vec) { + fn create_root(&self, attrs: Vec) { let elem = create_element( - &mut self.sink, + &self.sink, QualName::new(None, ns!(html), local_name!("html")), attrs, ); @@ -1273,7 +1318,7 @@ where // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token fn insert_element( - &mut self, + &self, push: PushFlag, ns: Namespace, name: LocalName, @@ -1287,7 +1332,7 @@ where // Step 7. let qname = QualName::new(None, ns, name); - let elem = create_element(&mut self.sink, qname.clone(), attrs.clone()); + let elem = create_element(&self.sink, qname.clone(), attrs.clone()); let insertion_point = self.appropriate_place_for_insertion(None); let (node1, node2) = match insertion_point { @@ -1300,14 +1345,14 @@ where // Step 12. if form_associatable(qname.expanded()) - && self.form_elem.is_some() + && self.form_elem.borrow().is_some() && !self.in_html_elem_named(local_name!("template")) && !(listed(qname.expanded()) && attrs .iter() .any(|a| a.name.expanded() == expanded_name!("", "form"))) { - let form = self.form_elem.as_ref().unwrap().clone(); + let form = self.form_elem.borrow().as_ref().unwrap().clone(); let node2 = match node2 { Some(ref n) => Some(n), None => None, @@ -1325,24 +1370,24 @@ where elem } - fn insert_element_for(&mut self, tag: Tag) -> Handle { + fn insert_element_for(&self, tag: Tag) -> Handle { self.insert_element(Push, ns!(html), tag.name, tag.attrs) } - fn insert_and_pop_element_for(&mut self, tag: Tag) -> Handle { + fn insert_and_pop_element_for(&self, tag: Tag) -> Handle { self.insert_element(NoPush, ns!(html), tag.name, tag.attrs) } - fn insert_phantom(&mut self, name: LocalName) -> Handle { + fn insert_phantom(&self, name: LocalName) -> Handle { self.insert_element(Push, ns!(html), name, vec![]) } //§ END - fn create_formatting_element_for(&mut self, tag: Tag) -> Handle { + fn create_formatting_element_for(&self, tag: Tag) -> Handle { // FIXME: This really wants unit tests. let mut first_match = None; let mut matches = 0usize; - for (i, _, old_tag) in self.active_formatting_end_to_marker() { + for (i, _, old_tag) in self.active_formatting_end_to_marker().iter() { if tag.equiv_modulo_attr_order(old_tag) { first_match = Some(i); matches += 1; @@ -1351,27 +1396,30 @@ where if matches >= 3 { self.active_formatting + .borrow_mut() .remove(first_match.expect("matches with no index")); } let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); - self.active_formatting.push(Element(elem.clone(), tag)); + self.active_formatting + .borrow_mut() + .push(Element(elem.clone(), tag)); elem } - fn clear_active_formatting_to_marker(&mut self) { + fn clear_active_formatting_to_marker(&self) { loop { - match self.active_formatting.pop() { + match self.active_formatting.borrow_mut().pop() { None | Some(Marker) => break, _ => (), } } } - fn process_end_tag_in_body(&mut self, tag: Tag) { + fn process_end_tag_in_body(&self, tag: Tag) { // Look back for a matching open element. let mut match_idx = None; - for (i, elem) in self.open_elems.iter().enumerate().rev() { + for (i, elem) in self.open_elems.borrow().iter().enumerate().rev() { if self.html_elem_named(elem, tag.name.clone()) { match_idx = Some(i); break; @@ -1397,16 +1445,17 @@ where self.generate_implied_end_except(tag.name.clone()); - if match_idx != self.open_elems.len() - 1 { + if match_idx != self.open_elems.borrow().len() - 1 { // mis-nested tags self.unexpected(&tag); } - self.open_elems.truncate(match_idx); + self.open_elems.borrow_mut().truncate(match_idx); } - fn handle_misnested_a_tags(&mut self, tag: &Tag) { + fn handle_misnested_a_tags(&self, tag: &Tag) { let node = unwrap_or_return!( self.active_formatting_end_to_marker() + .iter() .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a"))) .next() .map(|(_, n, _)| n.clone()), @@ -1416,21 +1465,22 @@ where self.unexpected(tag); self.adoption_agency(local_name!("a")); self.position_in_active_formatting(&node) - .map(|index| self.active_formatting.remove(index)); + .map(|index| self.active_formatting.borrow_mut().remove(index)); self.remove_from_stack(&node); } //§ tree-construction - fn is_foreign(&mut self, token: &Token) -> bool { + fn is_foreign(&self, token: &Token) -> bool { if let EOFToken = *token { return false; } - if self.open_elems.is_empty() { + if self.open_elems.borrow().is_empty() { return false; } - let name = self.sink.elem_name(self.adjusted_current_node()); + let current = self.adjusted_current_node(); + let name = self.sink.elem_name(¤t); if let ns!(html) = *name.ns { return false; } @@ -1467,7 +1517,7 @@ where CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => { return !self .sink - .is_mathml_annotation_xml_integration_point(self.adjusted_current_node()); + .is_mathml_annotation_xml_integration_point(&self.adjusted_current_node()); }, _ => {}, }; @@ -1477,7 +1527,7 @@ where } //§ END - fn enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult { + fn enter_foreign(&self, mut tag: Tag, ns: Namespace) -> ProcessResult { match ns { ns!(mathml) => self.adjust_mathml_attributes(&mut tag), ns!(svg) => self.adjust_svg_attributes(&mut tag), @@ -1494,7 +1544,7 @@ where } } - fn adjust_svg_tag_name(&mut self, tag: &mut Tag) { + fn adjust_svg_tag_name(&self, tag: &mut Tag) { let Tag { ref mut name, .. } = *tag; match *name { local_name!("altglyph") => *name = local_name!("altGlyph"), @@ -1538,7 +1588,7 @@ where } } - fn adjust_attributes(&mut self, tag: &mut Tag, mut map: F) + fn adjust_attributes(&self, tag: &mut Tag, mut map: F) where F: FnMut(LocalName) -> Option, { @@ -1549,7 +1599,7 @@ where } } - fn adjust_svg_attributes(&mut self, tag: &mut Tag) { + fn adjust_svg_attributes(&self, tag: &mut Tag) { self.adjust_attributes(tag, |k| match k { local_name!("attributename") => Some(qualname!("", "attributeName")), local_name!("attributetype") => Some(qualname!("", "attributeType")), @@ -1613,14 +1663,14 @@ where }); } - fn adjust_mathml_attributes(&mut self, tag: &mut Tag) { + fn adjust_mathml_attributes(&self, tag: &mut Tag) { self.adjust_attributes(tag, |k| match k { local_name!("definitionurl") => Some(qualname!("", "definitionURL")), _ => None, }); } - fn adjust_foreign_attributes(&mut self, tag: &mut Tag) { + fn adjust_foreign_attributes(&self, tag: &mut Tag) { self.adjust_attributes(tag, |k| match k { local_name!("xlink:actuate") => Some(qualname!("xlink" xlink "actuate")), local_name!("xlink:arcrole") => Some(qualname!("xlink" xlink "arcrole")), @@ -1637,8 +1687,12 @@ where }); } - fn foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult { - let current_ns = self.sink.elem_name(self.adjusted_current_node()).ns.clone(); + fn foreign_start_tag(&self, mut tag: Tag) -> ProcessResult { + let current_ns = self + .sink + .elem_name(&self.adjusted_current_node()) + .ns + .clone(); match current_ns { ns!(mathml) => self.adjust_mathml_attributes(&mut tag), ns!(svg) => { @@ -1658,13 +1712,13 @@ where } } - fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult { + fn unexpected_start_tag_in_foreign_content(&self, tag: Tag) -> ProcessResult { self.unexpected(&tag); while !self.current_node_in(|n| { *n.ns == ns!(html) || mathml_text_integration_point(n) || svg_html_integration_point(n) }) { self.pop(); } - self.step(self.mode, TagToken(tag)) + self.step(self.mode.get(), TagToken(tag)) } } diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index 5e94bd57..947d6f60 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -33,7 +33,7 @@ where Handle: Clone, Sink: TreeSink, { - fn step(&mut self, mode: InsertionMode, token: Token) -> ProcessResult { + fn step(&self, mode: InsertionMode, token: Token) -> ProcessResult { self.debug_step(mode, &token); match mode { @@ -59,7 +59,7 @@ where tag @ => { self.create_root(tag.attrs); - self.mode = BeforeHead; + self.mode.set(BeforeHead); Done } @@ -82,8 +82,8 @@ where => self.step(InBody, token), tag @ => { - self.head_elem = Some(self.insert_element_for(tag)); - self.mode = InHead; + *self.head_elem.borrow_mut() = Some(self.insert_element_for(tag)); + self.mode.set(InHead); Done } @@ -92,7 +92,7 @@ where tag @ => self.unexpected(&tag), token => { - self.head_elem = Some(self.insert_phantom(local_name!("head"))); + *self.head_elem.borrow_mut() = Some(self.insert_phantom(local_name!("head"))); Reprocess(InHead, token) } }), @@ -118,7 +118,7 @@ where tag @ <style> <noscript> => { if (!self.opts.scripting_enabled) && (tag.name == local_name!("noscript")) { self.insert_element_for(tag); - self.mode = InHeadNoscript; + self.mode.set(InHeadNoscript); Done } else { self.parse_raw_data(tag, Rawtext) @@ -127,19 +127,19 @@ where tag @ <script> => { let elem = create_element( - &mut self.sink, QualName::new(None, ns!(html), local_name!("script")), + &self.sink, QualName::new(None, ns!(html), local_name!("script")), tag.attrs); if self.is_fragment() { self.sink.mark_script_already_started(&elem); } self.insert_appropriately(AppendNode(elem.clone()), None); - self.open_elems.push(elem); + self.open_elems.borrow_mut().push(elem); self.to_raw_text_mode(ScriptData) } </head> => { self.pop(); - self.mode = AfterHead; + self.mode.set(AfterHead); Done } @@ -147,10 +147,10 @@ where tag @ <template> => { self.insert_element_for(tag); - self.active_formatting.push(Marker); - self.frameset_ok = false; - self.mode = InTemplate; - self.template_modes.push(InTemplate); + self.active_formatting.borrow_mut().push(Marker); + self.frameset_ok.set(false); + self.mode.set(InTemplate); + self.template_modes.borrow_mut().push(InTemplate); Done } @@ -161,8 +161,8 @@ where self.generate_implied_end(thorough_implied_end); self.expect_to_close(local_name!("template")); self.clear_active_formatting_to_marker(); - self.template_modes.pop(); - self.mode = self.reset_insertion_mode(); + self.template_modes.borrow_mut().pop(); + self.mode.set(self.reset_insertion_mode()); } Done } @@ -182,7 +182,7 @@ where </noscript> => { self.pop(); - self.mode = InHead; + self.mode.set(InHead); Done }, @@ -216,21 +216,21 @@ where tag @ <body> => { self.insert_element_for(tag); - self.frameset_ok = false; - self.mode = InBody; + self.frameset_ok.set(false); + self.mode.set(InBody); Done } tag @ <frameset> => { self.insert_element_for(tag); - self.mode = InFrameset; + self.mode.set(InFrameset); Done } <base> <basefont> <bgsound> <link> <meta> <noframes> <script> <style> <template> <title> => { self.unexpected(&token); - let head = self.head_elem.as_ref().expect("no head element").clone(); + let head = self.head_elem.borrow().as_ref().expect("no head element").clone(); self.push(&head); let result = self.step(InHead, token); self.remove_from_stack(&head); @@ -257,7 +257,7 @@ where CharacterTokens(_, text) => { self.reconstruct_formatting(); if any_not_whitespace(&text) { - self.frameset_ok = false; + self.frameset_ok.set(false); } self.append_text(text) } @@ -267,7 +267,8 @@ where tag @ <html> => { self.unexpected(&tag); if !self.in_html_elem_named(local_name!("template")) { - let top = html_elem(&self.open_elems); + let open_elems = self.open_elems.borrow(); + let top = html_elem(&open_elems); self.sink.add_attrs_if_missing(top, tag.attrs); } Done @@ -280,10 +281,11 @@ where tag @ <body> => { self.unexpected(&tag); - match self.body_elem().cloned() { - Some(ref node) if self.open_elems.len() != 1 && + let body_elem = self.body_elem().as_deref().cloned(); + match body_elem { + Some(ref node) if self.open_elems.borrow().len() != 1 && !self.in_html_elem_named(local_name!("template")) => { - self.frameset_ok = false; + self.frameset_ok.set(false); self.sink.add_attrs_if_missing(node, tag.attrs) }, _ => {} @@ -293,21 +295,21 @@ where tag @ <frameset> => { self.unexpected(&tag); - if !self.frameset_ok { return Done; } + if !self.frameset_ok.get() { return Done; } let body = unwrap_or_return!(self.body_elem(), Done).clone(); self.sink.remove_from_parent(&body); // FIXME: can we get here in the fragment case? // What to do with the first element then? - self.open_elems.truncate(1); + self.open_elems.borrow_mut().truncate(1); self.insert_element_for(tag); - self.mode = InFrameset; + self.mode.set(InFrameset); Done } EOFToken => { - if !self.template_modes.is_empty() { + if !self.template_modes.borrow().is_empty() { self.step(InTemplate, token) } else { self.check_body_end(); @@ -318,7 +320,7 @@ where </body> => { if self.in_scope_named(default_scope, local_name!("body")) { self.check_body_end(); - self.mode = AfterBody; + self.mode.set(AfterBody); } else { self.sink.parse_error(Borrowed("</body> with no <body> in scope")); } @@ -362,20 +364,20 @@ where tag @ <pre> <listing> => { self.close_p_element_in_button_scope(); self.insert_element_for(tag); - self.ignore_lf = true; - self.frameset_ok = false; + self.ignore_lf.set(true); + self.frameset_ok.set(false); Done } tag @ <form> => { - if self.form_elem.is_some() && + if self.form_elem.borrow().is_some() && !self.in_html_elem_named(local_name!("template")) { self.sink.parse_error(Borrowed("nested forms")); } else { self.close_p_element_in_button_scope(); let elem = self.insert_element_for(tag); if !self.in_html_elem_named(local_name!("template")) { - self.form_elem = Some(elem); + *self.form_elem.borrow_mut() = Some(elem); } } Done @@ -391,10 +393,10 @@ where _ => unreachable!(), }; - self.frameset_ok = false; + self.frameset_ok.set(false); let mut to_close = None; - for node in self.open_elems.iter().rev() { + for node in self.open_elems.borrow().iter().rev() { let name = self.sink.elem_name(node); let can_close = if list { close_list(name) @@ -437,7 +439,7 @@ where } self.reconstruct_formatting(); self.insert_element_for(tag); - self.frameset_ok = false; + self.frameset_ok.set(false); Done } @@ -558,8 +560,8 @@ where tag @ <applet> <marquee> <object> => { self.reconstruct_formatting(); self.insert_element_for(tag); - self.active_formatting.push(Marker); - self.frameset_ok = false; + self.active_formatting.borrow_mut().push(Marker); + self.frameset_ok.set(false); Done } @@ -575,12 +577,12 @@ where } tag @ <table> => { - if self.quirks_mode != Quirks { + if self.quirks_mode.get() != Quirks { self.close_p_element_in_button_scope(); } self.insert_element_for(tag); - self.frameset_ok = false; - self.mode = InTable; + self.frameset_ok.set(false); + self.mode.set(InTable); Done } @@ -601,7 +603,7 @@ where self.reconstruct_formatting(); self.insert_and_pop_element_for(tag); if !keep_frameset_ok { - self.frameset_ok = false; + self.frameset_ok.set(false); } DoneAckSelfClosing } @@ -614,7 +616,7 @@ where tag @ <hr> => { self.close_p_element_in_button_scope(); self.insert_and_pop_element_for(tag); - self.frameset_ok = false; + self.frameset_ok.set(false); DoneAckSelfClosing } @@ -627,20 +629,20 @@ where } tag @ <textarea> => { - self.ignore_lf = true; - self.frameset_ok = false; + self.ignore_lf.set(true); + self.frameset_ok.set(false); self.parse_raw_data(tag, Rcdata) } tag @ <xmp> => { self.close_p_element_in_button_scope(); self.reconstruct_formatting(); - self.frameset_ok = false; + self.frameset_ok.set(false); self.parse_raw_data(tag, Rawtext) } tag @ <iframe> => { - self.frameset_ok = false; + self.frameset_ok.set(false); self.parse_raw_data(tag, Rawtext) } @@ -653,14 +655,14 @@ where tag @ <select> => { self.reconstruct_formatting(); self.insert_element_for(tag); - self.frameset_ok = false; + self.frameset_ok.set(false); // NB: mode == InBody but possibly self.mode != mode, if // we're processing "as in the rules for InBody". - self.mode = match self.mode { + self.mode.set(match self.mode.get() { InTable | InCaption | InTableBody | InRow | InCell => InSelectInTable, _ => InSelect, - }; + }); Done } @@ -732,7 +734,8 @@ where EOFToken => { self.unexpected(&token); if self.current_node_named(local_name!("script")) { - let current = current_node(&self.open_elems); + let open_elems = self.open_elems.borrow(); + let current = current_node(&open_elems); self.sink.mark_script_already_started(current); } self.pop(); @@ -741,7 +744,7 @@ where tag @ </_> => { let node = self.pop(); - self.mode = self.orig_mode.take().unwrap(); + self.mode.set(self.orig_mode.take().unwrap()); if tag.name == local_name!("script") { return Script(node); } @@ -764,16 +767,16 @@ where tag @ <caption> => { self.pop_until_current(table_scope); - self.active_formatting.push(Marker); + self.active_formatting.borrow_mut().push(Marker); self.insert_element_for(tag); - self.mode = InCaption; + self.mode.set(InCaption); Done } tag @ <colgroup> => { self.pop_until_current(table_scope); self.insert_element_for(tag); - self.mode = InColumnGroup; + self.mode.set(InColumnGroup); Done } @@ -786,7 +789,7 @@ where tag @ <tbody> <tfoot> <thead> => { self.pop_until_current(table_scope); self.insert_element_for(tag); - self.mode = InTableBody; + self.mode.set(InTableBody); Done } @@ -809,7 +812,7 @@ where </table> => { if self.in_scope_named(table_scope, local_name!("table")) { self.pop_until_named(local_name!("table")); - self.mode = self.reset_insertion_mode(); + self.mode.set(self.reset_insertion_mode()); } else { self.unexpected(&token); } @@ -835,8 +838,8 @@ where tag @ <form> => { self.unexpected(&tag); - if !self.in_html_elem_named(local_name!("template")) && self.form_elem.is_none() { - self.form_elem = Some(self.insert_and_pop_element_for(tag)); + if !self.in_html_elem_named(local_name!("template")) && self.form_elem.borrow().is_none() { + *self.form_elem.borrow_mut() = Some(self.insert_and_pop_element_for(tag)); } Done } @@ -854,12 +857,12 @@ where NullCharacterToken => self.unexpected(&token), CharacterTokens(split, text) => { - self.pending_table_text.push((split, text)); + self.pending_table_text.borrow_mut().push((split, text)); Done } token => { - let pending = ::std::mem::take(&mut self.pending_table_text); + let pending = self.pending_table_text.take(); let contains_nonspace = pending.iter().any(|&(split, ref text)| { match split { Whitespace => false, @@ -896,7 +899,7 @@ where self.clear_active_formatting_to_marker(); match tag { Tag { kind: EndTag, name: local_name!("caption"), .. } => { - self.mode = InTable; + self.mode.set(InTable); Done } _ => Reprocess(InTable, TagToken(tag)) @@ -929,7 +932,7 @@ where </colgroup> => { if self.current_node_named(local_name!("colgroup")) { self.pop(); - self.mode = InTable; + self.mode.set(InTable); } else { self.unexpected(&token); } @@ -957,7 +960,7 @@ where tag @ <tr> => { self.pop_until_current(table_body_context); self.insert_element_for(tag); - self.mode = InRow; + self.mode.set(InRow); Done } @@ -972,7 +975,7 @@ where if self.in_scope_named(table_scope, tag.name.clone()) { self.pop_until_current(table_body_context); self.pop(); - self.mode = InTable; + self.mode.set(InTable); } else { self.unexpected(&tag); } @@ -1001,8 +1004,8 @@ where tag @ <th> <td> => { self.pop_until_current(table_row_context); self.insert_element_for(tag); - self.mode = InCell; - self.active_formatting.push(Marker); + self.mode.set(InCell); + self.active_formatting.borrow_mut().push(Marker); Done } @@ -1011,7 +1014,7 @@ where self.pop_until_current(table_row_context); let node = self.pop(); self.assert_named(&node, local_name!("tr")); - self.mode = InTableBody; + self.mode.set(InTableBody); } else { self.unexpected(&token); } @@ -1057,7 +1060,7 @@ where self.generate_implied_end(cursory_implied_end); self.expect_to_close(tag.name); self.clear_active_formatting_to_marker(); - self.mode = InRow; + self.mode.set(InRow); } else { self.unexpected(&tag); } @@ -1128,9 +1131,9 @@ where } </optgroup> => { - if self.open_elems.len() >= 2 + if self.open_elems.borrow().len() >= 2 && self.current_node_named(local_name!("option")) - && self.html_elem_named(&self.open_elems[self.open_elems.len() - 2], + && self.html_elem_named(&self.open_elems.borrow()[self.open_elems.borrow().len() - 2], local_name!("optgroup")) { self.pop(); } @@ -1160,7 +1163,7 @@ where if in_scope { self.pop_until_named(local_name!("select")); - self.mode = self.reset_insertion_mode(); + self.mode.set(self.reset_insertion_mode()); } Done } @@ -1214,26 +1217,26 @@ where } <caption> <colgroup> <tbody> <tfoot> <thead> => { - self.template_modes.pop(); - self.template_modes.push(InTable); + self.template_modes.borrow_mut().pop(); + self.template_modes.borrow_mut().push(InTable); Reprocess(InTable, token) } <col> => { - self.template_modes.pop(); - self.template_modes.push(InColumnGroup); + self.template_modes.borrow_mut().pop(); + self.template_modes.borrow_mut().push(InColumnGroup); Reprocess(InColumnGroup, token) } <tr> => { - self.template_modes.pop(); - self.template_modes.push(InTableBody); + self.template_modes.borrow_mut().pop(); + self.template_modes.borrow_mut().push(InTableBody); Reprocess(InTableBody, token) } <td> <th> => { - self.template_modes.pop(); - self.template_modes.push(InRow); + self.template_modes.borrow_mut().pop(); + self.template_modes.borrow_mut().push(InRow); Reprocess(InRow, token) } @@ -1244,15 +1247,15 @@ where self.unexpected(&token); self.pop_until_named(local_name!("template")); self.clear_active_formatting_to_marker(); - self.template_modes.pop(); - self.mode = self.reset_insertion_mode(); + self.template_modes.borrow_mut().pop(); + self.mode.set(self.reset_insertion_mode()); Reprocess(self.reset_insertion_mode(), token) } } tag @ <_> => { - self.template_modes.pop(); - self.template_modes.push(InBody); + self.template_modes.borrow_mut().pop(); + self.template_modes.borrow_mut().push(InBody); Reprocess(InBody, TagToken(tag)) } @@ -1271,7 +1274,7 @@ where if self.is_fragment() { self.unexpected(&token); } else { - self.mode = AfterAfterBody; + self.mode.set(AfterAfterBody); } Done } @@ -1298,12 +1301,12 @@ where } </frameset> => { - if self.open_elems.len() == 1 { + if self.open_elems.borrow().len() == 1 { self.unexpected(&token); } else { self.pop(); if !self.is_fragment() && !self.current_node_named(local_name!("frameset")) { - self.mode = AfterFrameset; + self.mode.set(AfterFrameset); } } Done @@ -1317,7 +1320,7 @@ where <noframes> => self.step(InHead, token), EOFToken => { - if self.open_elems.len() != 1 { + if self.open_elems.borrow().len() != 1 { self.unexpected(&token); } self.stop_parsing() @@ -1335,7 +1338,7 @@ where <html> => self.step(InBody, token), </html> => { - self.mode = AfterAfterFrameset; + self.mode.set(AfterAfterFrameset); Done } @@ -1380,7 +1383,7 @@ where } } - fn step_foreign(&mut self, token: Token) -> ProcessResult<Handle> { + fn step_foreign(&self, token: Token) -> ProcessResult<Handle> { match_token!(token { NullCharacterToken => { self.unexpected(&token); @@ -1389,7 +1392,7 @@ where CharacterTokens(_, text) => { if any_not_whitespace(&text) { - self.frameset_ok = false; + self.frameset_ok.set(false); } self.append_text(text) } @@ -1422,7 +1425,7 @@ where tag @ </_> => { let mut first = true; - let mut stack_idx = self.open_elems.len() - 1; + let mut stack_idx = self.open_elems.borrow().len() - 1; loop { if stack_idx == 0 { return Done; @@ -1431,17 +1434,18 @@ where let html; let eq; { - let node_name = self.sink.elem_name(&self.open_elems[stack_idx]); + let open_elems = self.open_elems.borrow(); + let node_name = self.sink.elem_name(&open_elems[stack_idx]); html = *node_name.ns == ns!(html); eq = node_name.local.eq_ignore_ascii_case(&tag.name); } if !first && html { - let mode = self.mode; + let mode = self.mode.get(); return self.step(mode, TagToken(tag)); } if eq { - self.open_elems.truncate(stack_idx); + self.open_elems.borrow_mut().truncate(stack_idx); return Done; } diff --git a/markup5ever/interface/mod.rs b/markup5ever/interface/mod.rs index 7c4bea2e..0cac03db 100644 --- a/markup5ever/interface/mod.rs +++ b/markup5ever/interface/mod.rs @@ -19,18 +19,12 @@ use super::{LocalName, Namespace, Prefix}; /// An [expanded name], containing the tag and the namespace. /// /// [expanded name]: https://www.w3.org/TR/REC-xml-names/#dt-expname -#[derive(Copy, Clone, Eq, Hash)] +#[derive(Copy, Clone, Eq, Hash, PartialEq)] pub struct ExpandedName<'a> { pub ns: &'a Namespace, pub local: &'a LocalName, } -impl<'a, 'b> PartialEq<ExpandedName<'a>> for ExpandedName<'b> { - fn eq(&self, other: &ExpandedName<'a>) -> bool { - self.ns == other.ns && self.local == other.local - } -} - impl<'a> fmt::Debug for ExpandedName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self.ns.is_empty() { diff --git a/markup5ever/interface/tree_builder.rs b/markup5ever/interface/tree_builder.rs index 4010a160..a0d24265 100644 --- a/markup5ever/interface/tree_builder.rs +++ b/markup5ever/interface/tree_builder.rs @@ -78,7 +78,7 @@ pub struct ElementFlags { /// # Examples /// /// Create an element like `<div class="test-class-name"></div>`: -pub fn create_element<Sink>(sink: &mut Sink, name: QualName, attrs: Vec<Attribute>) -> Sink::Handle +pub fn create_element<Sink>(sink: &Sink, name: QualName, attrs: Vec<Attribute>) -> Sink::Handle where Sink: TreeSink, { @@ -121,10 +121,10 @@ pub trait TreeSink { fn finish(self) -> Self::Output; /// Signal a parse error. - fn parse_error(&mut self, msg: Cow<'static, str>); + fn parse_error(&self, msg: Cow<'static, str>); /// Get a handle to the `Document` node. - fn get_document(&mut self) -> Self::Handle; + fn get_document(&self) -> Self::Handle; /// What is the name of this element? /// @@ -142,30 +142,30 @@ pub trait TreeSink { /// /// [whatwg template]: https://html.spec.whatwg.org/multipage/#the-template-element fn create_element( - &mut self, + &self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags, ) -> Self::Handle; /// Create a comment node. - fn create_comment(&mut self, text: StrTendril) -> Self::Handle; + fn create_comment(&self, text: StrTendril) -> Self::Handle; /// Create a Processing Instruction node. - fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle; + fn create_pi(&self, target: StrTendril, data: StrTendril) -> Self::Handle; /// Append a node as the last child of the given node. If this would /// produce adjacent sibling text nodes, it should concatenate the text /// instead. /// /// The child node will not already have a parent. - fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>); + fn append(&self, parent: &Self::Handle, child: NodeOrText<Self::Handle>); /// When the insertion point is decided by the existence of a parent node of the /// element, we consider both possibilities and send the element which will be used /// if a parent node exists, along with the element to be used if there isn't one. fn append_based_on_parent_node( - &mut self, + &self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText<Self::Handle>, @@ -173,28 +173,28 @@ pub trait TreeSink { /// Append a `DOCTYPE` element to the `Document` node. fn append_doctype_to_document( - &mut self, + &self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, ); /// Mark a HTML `<script>` as "already started". - fn mark_script_already_started(&mut self, _node: &Self::Handle) {} + fn mark_script_already_started(&self, _node: &Self::Handle) {} /// Indicate that a node was popped off the stack of open elements. - fn pop(&mut self, _node: &Self::Handle) {} + fn pop(&self, _node: &Self::Handle) {} /// Get a handle to a template's template contents. The tree builder /// promises this will never be called with something else than /// a template element. - fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle; + fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle; /// Do two handles refer to the same node? fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool; /// Set the document's quirks mode. - fn set_quirks_mode(&mut self, mode: QuirksMode); + fn set_quirks_mode(&self, mode: QuirksMode); /// Append a node as the sibling immediately before the given node. /// @@ -204,16 +204,16 @@ pub trait TreeSink { /// be merged, as in the behavior of `append`. /// /// NB: `new_node` may have an old parent, from which it should be removed. - fn append_before_sibling(&mut self, sibling: &Self::Handle, new_node: NodeOrText<Self::Handle>); + fn append_before_sibling(&self, sibling: &Self::Handle, new_node: NodeOrText<Self::Handle>); /// Add each attribute to the given element, if no attribute with that name /// already exists. The tree builder promises this will never be called /// with something else than an element. - fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>); + fn add_attrs_if_missing(&self, target: &Self::Handle, attrs: Vec<Attribute>); /// Associate the given form-associatable element with the form element fn associate_with_form( - &mut self, + &self, _target: &Self::Handle, _form: &Self::Handle, _nodes: (&Self::Handle, Option<&Self::Handle>), @@ -221,10 +221,10 @@ pub trait TreeSink { } /// Detach the given node from its parent. - fn remove_from_parent(&mut self, target: &Self::Handle); + fn remove_from_parent(&self, target: &Self::Handle); /// Remove all the children from node and append them to new_parent. - fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle); + fn reparent_children(&self, node: &Self::Handle, new_parent: &Self::Handle); /// Returns true if the adjusted current node is an HTML integration point /// and the token is a start tag. @@ -233,10 +233,10 @@ pub trait TreeSink { } /// Called whenever the line number changes. - fn set_current_line(&mut self, _line_number: u64) {} + fn set_current_line(&self, _line_number: u64) {} /// Indicate that a `script` element is complete. - fn complete_script(&mut self, _node: &Self::Handle) -> NextParserState { + fn complete_script(&self, _node: &Self::Handle) -> NextParserState { NextParserState::Continue } } diff --git a/markup5ever/lib.rs b/markup5ever/lib.rs index a2c49753..a7ba2547 100644 --- a/markup5ever/lib.rs +++ b/markup5ever/lib.rs @@ -7,6 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#![allow(unexpected_cfgs)] + pub use tendril; /// Create a [`SmallCharSet`], with each space-separated number stored in the set. diff --git a/markup5ever/util/buffer_queue.rs b/markup5ever/util/buffer_queue.rs index 137d11eb..95a571e2 100644 --- a/markup5ever/util/buffer_queue.rs +++ b/markup5ever/util/buffer_queue.rs @@ -18,7 +18,7 @@ //! //! [`BufferQueue`]: struct.BufferQueue.html -use std::{cell::RefCell, collections::VecDeque}; +use std::{cell::RefCell, collections::VecDeque, mem}; use tendril::StrTendril; @@ -235,6 +235,17 @@ impl BufferQueue { result } + + pub fn replace_with(&self, other: BufferQueue) { + let _ = mem::replace(&mut *self.buffers.borrow_mut(), other.buffers.take()); + } + + pub fn swap_with(&self, other: &BufferQueue) { + mem::swap( + &mut *self.buffers.borrow_mut(), + &mut *other.buffers.borrow_mut(), + ); + } } #[cfg(test)] diff --git a/rcdom/examples/print-rcdom.rs b/rcdom/examples/print-rcdom.rs index 47ea9b1a..8a9bfb2c 100644 --- a/rcdom/examples/print-rcdom.rs +++ b/rcdom/examples/print-rcdom.rs @@ -69,9 +69,9 @@ fn main() { .unwrap(); walk(0, &dom.document); - if !dom.errors.is_empty() { + if !dom.errors.borrow().is_empty() { println!("\nParse errors:"); - for err in dom.errors.iter() { + for err in dom.errors.borrow().iter() { println!(" {}", err); } } diff --git a/rcdom/lib.rs b/rcdom/lib.rs index 0018c879..45553cc1 100644 --- a/rcdom/lib.rs +++ b/rcdom/lib.rs @@ -42,6 +42,7 @@ extern crate tendril; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::{HashSet, VecDeque}; +use std::default::Default; use std::fmt; use std::io; use std::mem; @@ -210,10 +211,10 @@ pub struct RcDom { pub document: Handle, /// Errors that occurred during parsing. - pub errors: Vec<Cow<'static, str>>, + pub errors: RefCell<Vec<Cow<'static, str>>>, /// The document's quirks mode. - pub quirks_mode: QuirksMode, + pub quirks_mode: Cell<QuirksMode>, } impl TreeSink for RcDom { @@ -224,15 +225,15 @@ impl TreeSink for RcDom { type Handle = Handle; - fn parse_error(&mut self, msg: Cow<'static, str>) { - self.errors.push(msg); + fn parse_error(&self, msg: Cow<'static, str>) { + self.errors.borrow_mut().push(msg); } - fn get_document(&mut self) -> Handle { + fn get_document(&self) -> Handle { self.document.clone() } - fn get_template_contents(&mut self, target: &Handle) -> Handle { + fn get_template_contents(&self, target: &Handle) -> Handle { if let NodeData::Element { ref template_contents, .. @@ -248,8 +249,8 @@ impl TreeSink for RcDom { } } - fn set_quirks_mode(&mut self, mode: QuirksMode) { - self.quirks_mode = mode; + fn set_quirks_mode(&self, mode: QuirksMode) { + self.quirks_mode.set(mode); } fn same_node(&self, x: &Handle, y: &Handle) -> bool { @@ -263,12 +264,7 @@ impl TreeSink for RcDom { }; } - fn create_element( - &mut self, - name: QualName, - attrs: Vec<Attribute>, - flags: ElementFlags, - ) -> Handle { + fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle { Node::new(NodeData::Element { name, attrs: RefCell::new(attrs), @@ -281,18 +277,18 @@ impl TreeSink for RcDom { }) } - fn create_comment(&mut self, text: StrTendril) -> Handle { + fn create_comment(&self, text: StrTendril) -> Handle { Node::new(NodeData::Comment { contents: text }) } - fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle { + fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle { Node::new(NodeData::ProcessingInstruction { target, contents: data, }) } - fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) { + fn append(&self, parent: &Handle, child: NodeOrText<Handle>) { // Append to an existing Text node if we have one. if let NodeOrText::AppendText(text) = &child { if let Some(h) = parent.children.borrow().last() { @@ -313,7 +309,7 @@ impl TreeSink for RcDom { ); } - fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) { + fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) { let (parent, i) = get_parent_and_index(sibling) .expect("append_before_sibling called on node without parent"); @@ -349,7 +345,7 @@ impl TreeSink for RcDom { } fn append_based_on_parent_node( - &mut self, + &self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText<Self::Handle>, @@ -366,7 +362,7 @@ impl TreeSink for RcDom { } fn append_doctype_to_document( - &mut self, + &self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, @@ -381,7 +377,7 @@ impl TreeSink for RcDom { ); } - fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) { + fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) { let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { attrs.borrow_mut() } else { @@ -399,11 +395,11 @@ impl TreeSink for RcDom { ); } - fn remove_from_parent(&mut self, target: &Handle) { + fn remove_from_parent(&self, target: &Handle) { remove_from_parent(target); } - fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { + fn reparent_children(&self, node: &Handle, new_parent: &Handle) { let mut children = node.children.borrow_mut(); let mut new_children = new_parent.children.borrow_mut(); for child in children.iter() { @@ -433,8 +429,8 @@ impl Default for RcDom { fn default() -> RcDom { RcDom { document: Node::new(NodeData::Document), - errors: vec![], - quirks_mode: tree_builder::NoQuirks, + errors: Default::default(), + quirks_mode: Cell::new(tree_builder::NoQuirks), } } } diff --git a/rcdom/tests/html-serializer.rs b/rcdom/tests/html-serializer.rs index 1439048e..2c5e6f62 100644 --- a/rcdom/tests/html-serializer.rs +++ b/rcdom/tests/html-serializer.rs @@ -15,15 +15,16 @@ use html5ever::{parse_document, parse_fragment, serialize, QualName}; use markup5ever::{local_name, namespace_url, ns}; use markup5ever_rcdom::{RcDom, SerializableHandle}; +use std::cell::RefCell; use std::io; -struct Tokens(Vec<Token>); +struct Tokens(RefCell<Vec<Token>>); impl TokenSink for Tokens { type Handle = (); - fn process_token(&mut self, token: Token, _: u64) -> TokenSinkResult<()> { - self.0.push(token); + fn process_token(&self, token: Token, _: u64) -> TokenSinkResult<()> { + self.0.borrow_mut().push(token); TokenSinkResult::Continue } } @@ -33,7 +34,7 @@ impl Serialize for Tokens { where S: Serializer, { - for t in self.0.iter() { + for t in self.0.borrow().iter() { match &t { // TODO: check whether this is an IE conditional comment or a spec comment Token::TagToken(tag) => { @@ -71,7 +72,7 @@ fn tokenize_and_serialize(input: StrTendril) -> StrTendril { q.push_front(input); q }; - let mut tokenizer = Tokenizer::new(Tokens(vec![]), Default::default()); + let tokenizer = Tokenizer::new(Tokens(RefCell::new(vec![])), Default::default()); let _ = tokenizer.feed(&input); tokenizer.end(); let mut output = ::std::io::Cursor::new(vec![]); diff --git a/rcdom/tests/html-tokenizer.rs b/rcdom/tests/html-tokenizer.rs index f84e4f27..7507fd60 100644 --- a/rcdom/tests/html-tokenizer.rs +++ b/rcdom/tests/html-tokenizer.rs @@ -21,11 +21,12 @@ use html5ever::tokenizer::{Doctype, EndTag, StartTag, Tag}; use html5ever::tokenizer::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; use html5ever::{namespace_url, ns, Attribute, LocalName, QualName}; use serde_json::{Map, Value}; +use std::cell::RefCell; use std::ffi::OsStr; use std::fs::File; use std::io::Read; use std::path::Path; -use std::{char, env, mem}; +use std::{char, env}; use util::runner::Test; @@ -68,57 +69,57 @@ fn splits(s: &str, n: usize) -> Vec<Vec<StrTendril>> { } struct TokenLogger { - tokens: Vec<Token>, - errors: Vec<TestError>, - current_str: StrTendril, + tokens: RefCell<Vec<Token>>, + errors: RefCell<Vec<TestError>>, + current_str: RefCell<StrTendril>, exact_errors: bool, } impl TokenLogger { fn new(exact_errors: bool) -> TokenLogger { TokenLogger { - tokens: vec![], - errors: vec![], - current_str: StrTendril::new(), + tokens: RefCell::new(vec![]), + errors: RefCell::new(vec![]), + current_str: RefCell::new(StrTendril::new()), exact_errors, } } // Push anything other than character tokens - fn push(&mut self, token: Token) { + fn push(&self, token: Token) { self.finish_str(); - self.tokens.push(token); + self.tokens.borrow_mut().push(token); } - fn finish_str(&mut self) { - if self.current_str.len() > 0 { - let s = mem::take(&mut self.current_str); - self.tokens.push(CharacterTokens(s)); + fn finish_str(&self) { + if self.current_str.borrow().len() > 0 { + let s = self.current_str.take(); + self.tokens.borrow_mut().push(CharacterTokens(s)); } } - fn get_tokens(mut self) -> (Vec<Token>, Vec<TestError>) { + fn get_tokens(self) -> (Vec<Token>, Vec<TestError>) { self.finish_str(); - (self.tokens, self.errors) + (self.tokens.take(), self.errors.take()) } } impl TokenSink for TokenLogger { type Handle = (); - fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> { match token { CharacterTokens(b) => { - self.current_str.push_slice(&b); + self.current_str.borrow_mut().push_slice(&b); }, NullCharacterToken => { - self.current_str.push_char('\0'); + self.current_str.borrow_mut().push_char('\0'); }, ParseError(_) => { if self.exact_errors { - self.errors.push(TestError); + self.errors.borrow_mut().push(TestError); } }, @@ -146,7 +147,7 @@ impl TokenSink for TokenLogger { fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> (Vec<Token>, Vec<TestError>) { let sink = TokenLogger::new(opts.exact_errors); - let mut tok = Tokenizer::new(sink, opts); + let tok = Tokenizer::new(sink, opts); let buffer = BufferQueue::default(); for chunk in input.into_iter() { buffer.push_back(chunk); @@ -271,7 +272,7 @@ fn json_to_tokens( ) -> (Vec<Token>, Vec<TestError>) { // Use a TokenLogger so that we combine character tokens separated // by an ignored error. - let mut sink = TokenLogger::new(exact_errors); + let sink = TokenLogger::new(exact_errors); for tok in js_tokens.get_list().iter() { assert_eq!( sink.process_token(json_to_token(tok), 0), diff --git a/rcdom/tests/html-tree-sink.rs b/rcdom/tests/html-tree-sink.rs index 2834863f..c5d05ae2 100644 --- a/rcdom/tests/html-tree-sink.rs +++ b/rcdom/tests/html-tree-sink.rs @@ -7,10 +7,11 @@ use markup5ever::interface::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use markup5ever::{local_name, namespace_url, ns, Attribute}; use markup5ever_rcdom::{Handle, RcDom}; use std::borrow::Cow; +use std::cell::{Cell, RefCell}; pub struct LineCountingDOM { - pub line_vec: Vec<(QualName, u64)>, - pub current_line: u64, + pub line_vec: RefCell<Vec<(QualName, u64)>>, + pub current_line: Cell<u64>, pub rcdom: RcDom, } @@ -23,19 +24,19 @@ impl TreeSink for LineCountingDOM { type Handle = Handle; - fn parse_error(&mut self, msg: Cow<'static, str>) { + fn parse_error(&self, msg: Cow<'static, str>) { self.rcdom.parse_error(msg); } - fn get_document(&mut self) -> Handle { + fn get_document(&self) -> Handle { self.rcdom.get_document() } - fn get_template_contents(&mut self, target: &Handle) -> Handle { + fn get_template_contents(&self, target: &Handle) -> Handle { self.rcdom.get_template_contents(target) } - fn set_quirks_mode(&mut self, mode: QuirksMode) { + fn set_quirks_mode(&self, mode: QuirksMode) { self.rcdom.set_quirks_mode(mode) } @@ -47,34 +48,31 @@ impl TreeSink for LineCountingDOM { self.rcdom.elem_name(target) } - fn create_element( - &mut self, - name: QualName, - attrs: Vec<Attribute>, - flags: ElementFlags, - ) -> Handle { - self.line_vec.push((name.clone(), self.current_line)); + fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle { + self.line_vec + .borrow_mut() + .push((name.clone(), self.current_line.get())); self.rcdom.create_element(name, attrs, flags) } - fn create_comment(&mut self, text: StrTendril) -> Handle { + fn create_comment(&self, text: StrTendril) -> Handle { self.rcdom.create_comment(text) } - fn create_pi(&mut self, target: StrTendril, content: StrTendril) -> Handle { + fn create_pi(&self, target: StrTendril, content: StrTendril) -> Handle { self.rcdom.create_pi(target, content) } - fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) { + fn append(&self, parent: &Handle, child: NodeOrText<Handle>) { self.rcdom.append(parent, child) } - fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) { + fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) { self.rcdom.append_before_sibling(sibling, child) } fn append_based_on_parent_node( - &mut self, + &self, element: &Handle, prev_element: &Handle, child: NodeOrText<Handle>, @@ -84,7 +82,7 @@ impl TreeSink for LineCountingDOM { } fn append_doctype_to_document( - &mut self, + &self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, @@ -93,24 +91,24 @@ impl TreeSink for LineCountingDOM { .append_doctype_to_document(name, public_id, system_id); } - fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) { + fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) { self.rcdom.add_attrs_if_missing(target, attrs); } - fn remove_from_parent(&mut self, target: &Handle) { + fn remove_from_parent(&self, target: &Handle) { self.rcdom.remove_from_parent(target); } - fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { + fn reparent_children(&self, node: &Handle, new_parent: &Handle) { self.rcdom.reparent_children(node, new_parent); } - fn mark_script_already_started(&mut self, target: &Handle) { + fn mark_script_already_started(&self, target: &Handle) { self.rcdom.mark_script_already_started(target); } - fn set_current_line(&mut self, line_number: u64) { - self.current_line = line_number; + fn set_current_line(&self, line_number: u64) { + self.current_line.set(line_number); } } @@ -118,8 +116,8 @@ impl TreeSink for LineCountingDOM { fn check_four_lines() { // Input let sink = LineCountingDOM { - line_vec: vec![], - current_line: 1, + line_vec: RefCell::new(vec![]), + current_line: Cell::new(1), rcdom: RcDom::default(), }; let mut result_tok = driver::parse_document(sink, Default::default()); @@ -138,5 +136,5 @@ fn check_four_lines() { (QualName::new(None, ns!(html), local_name!("b")), 3), ]; // Assertion - assert_eq!(actual.line_vec, expected); + assert_eq!(*actual.line_vec.borrow(), expected); } diff --git a/rcdom/tests/xml-tokenizer.rs b/rcdom/tests/xml-tokenizer.rs index a72cd791..5bcc7fcd 100644 --- a/rcdom/tests/xml-tokenizer.rs +++ b/rcdom/tests/xml-tokenizer.rs @@ -9,10 +9,11 @@ use serde_json::{Map, Value}; use std::borrow::Cow::Borrowed; +use std::cell::RefCell; +use std::env; use std::ffi::OsStr; use std::io::Read; use std::path::Path; -use std::{env, mem}; use util::find_tests::foreach_xml5lib_test; use util::runner::Test; @@ -56,48 +57,48 @@ fn splits(s: &str, n: usize) -> Vec<Vec<StrTendril>> { } struct TokenLogger { - tokens: Vec<Token>, - current_str: StrTendril, + tokens: RefCell<Vec<Token>>, + current_str: RefCell<StrTendril>, exact_errors: bool, } impl TokenLogger { fn new(exact_errors: bool) -> TokenLogger { TokenLogger { - tokens: vec![], - current_str: StrTendril::new(), + tokens: RefCell::new(vec![]), + current_str: RefCell::new(StrTendril::new()), exact_errors, } } // Push anything other than character tokens - fn push(&mut self, token: Token) { + fn push(&self, token: Token) { self.finish_str(); - self.tokens.push(token); + self.tokens.borrow_mut().push(token); } - fn finish_str(&mut self) { - if self.current_str.len() > 0 { - let s = mem::take(&mut self.current_str); - self.tokens.push(CharacterTokens(s)); + fn finish_str(&self) { + if self.current_str.borrow().len() > 0 { + let s = self.current_str.take(); + self.tokens.borrow_mut().push(CharacterTokens(s)); } } - fn get_tokens(mut self) -> Vec<Token> { + fn get_tokens(self) -> Vec<Token> { self.finish_str(); - self.tokens + self.tokens.take() } } impl TokenSink for TokenLogger { - fn process_token(&mut self, token: Token) { + fn process_token(&self, token: Token) { match token { CharacterTokens(b) => { - self.current_str.push_slice(&b); + self.current_str.borrow_mut().push_slice(&b); }, NullCharacterToken => { - self.current_str.push_char('\0'); + self.current_str.borrow_mut().push_char('\0'); }, ParseError(_) => { @@ -128,7 +129,7 @@ impl TokenSink for TokenLogger { fn tokenize_xml(input: Vec<StrTendril>, opts: XmlTokenizerOpts) -> Vec<Token> { let sink = TokenLogger::new(opts.exact_errors); - let mut tok = XmlTokenizer::new(sink, opts); + let tok = XmlTokenizer::new(sink, opts); let buf = BufferQueue::default(); for chunk in input.into_iter() { @@ -269,7 +270,7 @@ fn json_to_token(js: &Value) -> Token { fn json_to_tokens(js: &Value, exact_errors: bool) -> Vec<Token> { // Use a TokenLogger so that we combine character tokens separated // by an ignored error. - let mut sink = TokenLogger::new(exact_errors); + let sink = TokenLogger::new(exact_errors); for tok in js.as_array().unwrap().iter() { match *tok { Value::String(ref s) if &s[..] == "ParseError" => { diff --git a/xml5ever/benches/xml5ever.rs b/xml5ever/benches/xml5ever.rs index 14f3566c..74ce3a15 100644 --- a/xml5ever/benches/xml5ever.rs +++ b/xml5ever/benches/xml5ever.rs @@ -15,7 +15,7 @@ use xml5ever::tokenizer::{Token, TokenSink, XmlTokenizer}; struct Sink; impl TokenSink for Sink { - fn process_token(&mut self, token: Token) { + fn process_token(&self, token: Token) { // Don't use the token, but make sure we don't get // optimized out entirely. black_box(token); @@ -52,7 +52,7 @@ fn run_bench(c: &mut Criterion, name: &str) { c.bench_function(&test_name, move |b| { b.iter(|| { - let mut tok = XmlTokenizer::new(Sink, Default::default()); + let tok = XmlTokenizer::new(Sink, Default::default()); let buffer = BufferQueue::default(); // We are doing clone inside the bench function, this is not ideal, but possibly // necessary since our iterator consumes the underlying buffer. diff --git a/xml5ever/examples/simple_xml_tokenizer.rs b/xml5ever/examples/simple_xml_tokenizer.rs index 002c236c..fc4b7b1a 100644 --- a/xml5ever/examples/simple_xml_tokenizer.rs +++ b/xml5ever/examples/simple_xml_tokenizer.rs @@ -24,7 +24,7 @@ use xml5ever::tokenizer::{ParseError, Token, TokenSink, XmlTokenizer}; struct SimpleTokenPrinter; impl TokenSink for SimpleTokenPrinter { - fn process_token(&mut self, token: Token) { + fn process_token(&self, token: Token) { match token { CharacterTokens(b) => { println!("TEXT: {}", &*b); @@ -75,7 +75,7 @@ fn main() { let input_buffer = BufferQueue::default(); input_buffer.push_back(input.try_reinterpret().unwrap()); // Here we create and run tokenizer - let mut tok = XmlTokenizer::new(sink, Default::default()); + let tok = XmlTokenizer::new(sink, Default::default()); tok.feed(&input_buffer); tok.end(); } diff --git a/xml5ever/examples/xml_tokenizer.rs b/xml5ever/examples/xml_tokenizer.rs index 63d27cb5..2776ed50 100644 --- a/xml5ever/examples/xml_tokenizer.rs +++ b/xml5ever/examples/xml_tokenizer.rs @@ -12,6 +12,7 @@ extern crate markup5ever; extern crate xml5ever; +use std::cell::Cell; use std::io; use markup5ever::buffer_queue::BufferQueue; @@ -21,29 +22,29 @@ use xml5ever::tokenizer::{EmptyTag, EndTag, ShortTag, StartTag}; use xml5ever::tokenizer::{PIToken, Pi}; use xml5ever::tokenizer::{ParseError, Token, TokenSink, XmlTokenizer, XmlTokenizerOpts}; -#[derive(Copy, Clone)] +#[derive(Clone)] struct TokenPrinter { - in_char_run: bool, + in_char_run: Cell<bool>, } impl TokenPrinter { - fn is_char(&mut self, is_char: bool) { - match (self.in_char_run, is_char) { + fn is_char(&self, is_char: bool) { + match (self.in_char_run.get(), is_char) { (false, true) => print!("CHAR : \""), (true, false) => println!("\""), _ => (), } - self.in_char_run = is_char; + self.in_char_run.set(is_char); } - fn do_char(&mut self, c: char) { + fn do_char(&self, c: char) { self.is_char(true); print!("{}", c.escape_default().collect::<String>()); } } impl TokenSink for TokenPrinter { - fn process_token(&mut self, token: Token) { + fn process_token(&self, token: Token) { match token { CharacterTokens(b) => { for c in b.chars() { @@ -88,13 +89,15 @@ impl TokenSink for TokenPrinter { } fn main() { - let mut sink = TokenPrinter { in_char_run: false }; + let sink = TokenPrinter { + in_char_run: Cell::new(false), + }; let mut input = ByteTendril::new(); io::stdin().read_to_tendril(&mut input).unwrap(); let input_buffer = BufferQueue::default(); input_buffer.push_back(input.try_reinterpret().unwrap()); - let mut tok = XmlTokenizer::new( + let tok = XmlTokenizer::new( sink, XmlTokenizerOpts { profile: true, @@ -104,5 +107,5 @@ fn main() { ); tok.feed(&input_buffer); tok.end(); - sink.is_char(false); + tok.sink.is_char(false); } diff --git a/xml5ever/src/driver.rs b/xml5ever/src/driver.rs index 7cb71c19..0245431d 100644 --- a/xml5ever/src/driver.rs +++ b/xml5ever/src/driver.rs @@ -71,7 +71,7 @@ impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> { self.tokenizer.sink.sink.parse_error(desc) } - fn finish(mut self) -> Self::Output { + fn finish(self) -> Self::Output { self.tokenizer.end(); self.tokenizer.sink.sink.finish() } diff --git a/xml5ever/src/lib.rs b/xml5ever/src/lib.rs index 4f990cdc..9d7d2557 100644 --- a/xml5ever/src/lib.rs +++ b/xml5ever/src/lib.rs @@ -30,6 +30,7 @@ #![crate_name = "xml5ever"] #![crate_type = "dylib"] +#![allow(unexpected_cfgs)] #![deny(missing_docs)] pub use markup5ever::*; diff --git a/xml5ever/src/tokenizer/char_ref/mod.rs b/xml5ever/src/tokenizer/char_ref/mod.rs index ecb6bfe4..84e5e70f 100644 --- a/xml5ever/src/tokenizer/char_ref/mod.rs +++ b/xml5ever/src/tokenizer/char_ref/mod.rs @@ -115,7 +115,7 @@ impl CharRefTokenizer { impl CharRefTokenizer { pub fn step<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { if self.result.is_some() { @@ -135,7 +135,7 @@ impl CharRefTokenizer { fn do_begin<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { @@ -158,7 +158,7 @@ impl CharRefTokenizer { fn do_octothorpe<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { let c = unwrap_or_return!(tokenizer.peek(input), Stuck); @@ -179,7 +179,7 @@ impl CharRefTokenizer { fn do_numeric<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, base: u32, input: &BufferQueue, ) -> Status { @@ -209,7 +209,7 @@ impl CharRefTokenizer { fn do_numeric_semicolon<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { @@ -223,7 +223,7 @@ impl CharRefTokenizer { fn unconsume_numeric<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { let mut unconsume = StrTendril::from_char('#'); @@ -236,7 +236,7 @@ impl CharRefTokenizer { self.finish_none() } - fn finish_numeric<Sink: TokenSink>(&mut self, tokenizer: &mut XmlTokenizer<Sink>) -> Status { + fn finish_numeric<Sink: TokenSink>(&mut self, tokenizer: &XmlTokenizer<Sink>) -> Status { fn conv(n: u32) -> char { from_u32(n).expect("invalid char missed by error handling cases") } @@ -272,7 +272,7 @@ impl CharRefTokenizer { fn do_named<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); @@ -294,7 +294,7 @@ impl CharRefTokenizer { } } - fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &mut XmlTokenizer<Sink>) { + fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &XmlTokenizer<Sink>) { let msg = format_if!( tokenizer.opts.exact_errors, "Invalid character reference", @@ -306,7 +306,7 @@ impl CharRefTokenizer { fn unconsume_name<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) { tokenizer.unconsume(input, self.name_buf_opt.take().unwrap()); @@ -314,7 +314,7 @@ impl CharRefTokenizer { fn finish_named<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, end_char: Option<char>, input: &BufferQueue, ) -> Status { @@ -403,7 +403,7 @@ impl CharRefTokenizer { fn do_bogus_name<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); @@ -419,7 +419,7 @@ impl CharRefTokenizer { pub fn end_of_file<Sink: TokenSink>( &mut self, - tokenizer: &mut XmlTokenizer<Sink>, + tokenizer: &XmlTokenizer<Sink>, input: &BufferQueue, ) { while self.result.is_none() { diff --git a/xml5ever/src/tokenizer/interface.rs b/xml5ever/src/tokenizer/interface.rs index c2dad9be..802eef33 100644 --- a/xml5ever/src/tokenizer/interface.rs +++ b/xml5ever/src/tokenizer/interface.rs @@ -109,15 +109,15 @@ pub enum Token { /// Types which can receive tokens from the tokenizer. pub trait TokenSink { /// Process a token. - fn process_token(&mut self, token: Token); + fn process_token(&self, token: Token); /// Signal to the sink that parsing has ended. - fn end(&mut self) {} + fn end(&self) {} /// The tokenizer will call this after emitting any start tag. /// This allows the tree builder to change the tokenizer's state. /// By default no state changes occur. - fn query_state_change(&mut self) -> Option<states::XmlState> { + fn query_state_change(&self) -> Option<states::XmlState> { None } } diff --git a/xml5ever/src/tokenizer/mod.rs b/xml5ever/src/tokenizer/mod.rs index fda51d57..84f296ec 100644 --- a/xml5ever/src/tokenizer/mod.rs +++ b/xml5ever/src/tokenizer/mod.rs @@ -25,8 +25,9 @@ use log::debug; use mac::{format_if, unwrap_or_return}; use markup5ever::{local_name, namespace_prefix, namespace_url, ns, small_char_set}; use std::borrow::Cow::{self, Borrowed}; +use std::cell::{Cell, RefCell, RefMut}; use std::collections::BTreeMap; -use std::mem::{self, replace}; +use std::mem::replace; use self::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; use self::char_ref::{CharRef, CharRefTokenizer}; @@ -105,64 +106,64 @@ pub struct XmlTokenizer<Sink> { pub sink: Sink, /// The abstract machine state as described in the spec. - state: states::XmlState, + state: Cell<states::XmlState>, /// Are we at the end of the file, once buffers have been processed /// completely? This affects whether we will wait for lookahead or not. - at_eof: bool, + at_eof: Cell<bool>, /// Tokenizer for character references, if we're tokenizing /// one at the moment. - char_ref_tokenizer: Option<Box<CharRefTokenizer>>, + char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>, /// Current input character. Just consumed, may reconsume. - current_char: char, + current_char: Cell<char>, /// Should we reconsume the current input character? - reconsume: bool, + reconsume: Cell<bool>, /// Did we just consume \r, translating it to \n? In that case we need /// to ignore the next character if it's \n. - ignore_lf: bool, + ignore_lf: Cell<bool>, /// Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the /// beginning of the stream. - discard_bom: bool, + discard_bom: Cell<bool>, /// Temporary buffer - temp_buf: StrTendril, + temp_buf: RefCell<StrTendril>, /// Current tag kind. - current_tag_kind: TagKind, + current_tag_kind: Cell<TagKind>, /// Current tag name. - current_tag_name: StrTendril, + current_tag_name: RefCell<StrTendril>, /// Current tag attributes. - current_tag_attrs: Vec<Attribute>, + current_tag_attrs: RefCell<Vec<Attribute>>, /// Current attribute name. - current_attr_name: StrTendril, + current_attr_name: RefCell<StrTendril>, /// Current attribute value. - current_attr_value: StrTendril, + current_attr_value: RefCell<StrTendril>, - current_doctype: Doctype, + current_doctype: RefCell<Doctype>, /// Current comment. - current_comment: StrTendril, + current_comment: RefCell<StrTendril>, /// Current processing instruction target. - current_pi_target: StrTendril, + current_pi_target: RefCell<StrTendril>, /// Current processing instruction value. - current_pi_data: StrTendril, + current_pi_data: RefCell<StrTendril>, /// Record of how many ns we spent in each state, if profiling is enabled. - state_profile: BTreeMap<states::XmlState, u64>, + state_profile: RefCell<BTreeMap<states::XmlState, u64>>, /// Record of how many ns we spent in the token sink. - time_in_sink: u64, + time_in_sink: Cell<u64>, } impl<Sink: TokenSink> XmlTokenizer<Sink> { @@ -177,35 +178,35 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { XmlTokenizer { opts, sink, - state, - char_ref_tokenizer: None, - at_eof: false, - current_char: '\0', - reconsume: false, - ignore_lf: false, - temp_buf: StrTendril::new(), - discard_bom, - current_tag_kind: StartTag, - current_tag_name: StrTendril::new(), - current_tag_attrs: vec![], - current_attr_name: StrTendril::new(), - current_attr_value: StrTendril::new(), - current_comment: StrTendril::new(), - current_pi_data: StrTendril::new(), - current_pi_target: StrTendril::new(), - current_doctype: Doctype::default(), - state_profile: BTreeMap::new(), - time_in_sink: 0, + state: Cell::new(state), + char_ref_tokenizer: RefCell::new(None), + at_eof: Cell::new(false), + current_char: Cell::new('\0'), + reconsume: Cell::new(false), + ignore_lf: Cell::new(false), + temp_buf: RefCell::new(StrTendril::new()), + discard_bom: Cell::new(discard_bom), + current_tag_kind: Cell::new(StartTag), + current_tag_name: RefCell::new(StrTendril::new()), + current_tag_attrs: RefCell::new(vec![]), + current_attr_name: RefCell::new(StrTendril::new()), + current_attr_value: RefCell::new(StrTendril::new()), + current_comment: RefCell::new(StrTendril::new()), + current_pi_data: RefCell::new(StrTendril::new()), + current_pi_target: RefCell::new(StrTendril::new()), + current_doctype: RefCell::new(Doctype::default()), + state_profile: RefCell::new(BTreeMap::new()), + time_in_sink: Cell::new(0), } } /// Feed an input string into the tokenizer. - pub fn feed(&mut self, input: &BufferQueue) { + pub fn feed(&self, input: &BufferQueue) { if input.is_empty() { return; } - if self.discard_bom { + if self.discard_bom.get() { if let Some(c) = input.peek() { if c == '\u{feff}' { input.next(); @@ -218,10 +219,10 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { self.run(input); } - fn process_token(&mut self, token: Token) { + fn process_token(&self, token: Token) { if self.opts.profile { let (_, dt) = time!(self.sink.process_token(token)); - self.time_in_sink += dt; + self.time_in_sink.set(self.time_in_sink.get() + dt); } else { self.sink.process_token(token); } @@ -229,16 +230,16 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { // Get the next input character, which might be the character // 'c' that we already consumed from the buffers. - fn get_preprocessed_char(&mut self, mut c: char, input: &BufferQueue) -> Option<char> { - if self.ignore_lf { - self.ignore_lf = false; + fn get_preprocessed_char(&self, mut c: char, input: &BufferQueue) -> Option<char> { + if self.ignore_lf.get() { + self.ignore_lf.set(false); if c == '\n' { c = unwrap_or_return!(input.next(), None); } } if c == '\r' { - self.ignore_lf = true; + self.ignore_lf.set(true); c = '\n'; } @@ -260,11 +261,11 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { } debug!("got character {}", c); - self.current_char = c; + self.current_char.set(c); Some(c) } - fn bad_eof_error(&mut self) { + fn bad_eof_error(&self) { let msg = format_if!( self.opts.exact_errors, "Unexpected EOF", @@ -274,12 +275,12 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { self.emit_error(msg); } - fn pop_except_from(&mut self, input: &BufferQueue, set: SmallCharSet) -> Option<SetResult> { + fn pop_except_from(&self, input: &BufferQueue, set: SmallCharSet) -> Option<SetResult> { // Bail to the slow path for various corner cases. // This means that `FromSet` can contain characters not in the set! // It shouldn't matter because the fallback `FromSet` case should // always do the same thing as the `NotFromSet` case. - if self.opts.exact_errors || self.reconsume || self.ignore_lf { + if self.opts.exact_errors || self.reconsume.get() || self.ignore_lf.get() { return self.get_char(input).map(FromSet); } @@ -300,13 +301,14 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { // // NB: this doesn't do input stream preprocessing or set the current input // character. - fn eat(&mut self, input: &BufferQueue, pat: &str) -> Option<bool> { - input.push_front(replace(&mut self.temp_buf, StrTendril::new())); + fn eat(&self, input: &BufferQueue, pat: &str) -> Option<bool> { + input.push_front(replace(&mut *self.temp_buf.borrow_mut(), StrTendril::new())); match input.eat(pat, u8::eq_ignore_ascii_case) { - None if self.at_eof => Some(false), + None if self.at_eof.get() => Some(false), None => { + let mut temp_buf = self.temp_buf.borrow_mut(); while let Some(data) = input.next() { - self.temp_buf.push_char(data); + temp_buf.push_char(data); } None }, @@ -315,14 +317,14 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { } /// Run the state machine for as long as we can. - pub fn run(&mut self, input: &BufferQueue) { + pub fn run(&self, input: &BufferQueue) { if self.opts.profile { loop { - let state = self.state; - let old_sink = self.time_in_sink; + let state = self.state.get(); + let old_sink = self.time_in_sink.get(); let (run, mut dt) = time!(self.step(input)); - dt -= self.time_in_sink - old_sink; - let new = match self.state_profile.get_mut(&state) { + dt -= self.time_in_sink.get() - old_sink; + let new = match self.state_profile.borrow_mut().get_mut(&state) { Some(x) => { *x += dt; false @@ -331,7 +333,7 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { }; if new { // do this here because of borrow shenanigans - self.state_profile.insert(state, dt); + self.state_profile.borrow_mut().insert(state, dt); } if !run { break; @@ -344,10 +346,10 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { //§ tokenization // Get the next input character, if one is available. - fn get_char(&mut self, input: &BufferQueue) -> Option<char> { - if self.reconsume { - self.reconsume = false; - Some(self.current_char) + fn get_char(&self, input: &BufferQueue) -> Option<char> { + if self.reconsume.get() { + self.reconsume.set(false); + Some(self.current_char.get()) } else { input .next() @@ -355,162 +357,167 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { } } - fn bad_char_error(&mut self) { + fn bad_char_error(&self) { let msg = format_if!( self.opts.exact_errors, "Bad character", "Saw {} in state {:?}", - self.current_char, - self.state + self.current_char.get(), + self.state.get() ); self.emit_error(msg); } - fn discard_tag(&mut self) { - self.current_tag_name = StrTendril::new(); - self.current_tag_attrs = Vec::new(); + fn discard_tag(&self) { + *self.current_tag_name.borrow_mut() = StrTendril::new(); + *self.current_tag_attrs.borrow_mut() = Vec::new(); } - fn create_tag(&mut self, kind: TagKind, c: char) { + fn create_tag(&self, kind: TagKind, c: char) { self.discard_tag(); - self.current_tag_name.push_char(c); - self.current_tag_kind = kind; + self.current_tag_name.borrow_mut().push_char(c); + self.current_tag_kind.set(kind); } // This method creates a PI token and // sets its target to given char - fn create_pi(&mut self, c: char) { - self.current_pi_target = StrTendril::new(); - self.current_pi_data = StrTendril::new(); - self.current_pi_target.push_char(c); + fn create_pi(&self, c: char) { + *self.current_pi_target.borrow_mut() = StrTendril::new(); + *self.current_pi_data.borrow_mut() = StrTendril::new(); + self.current_pi_target.borrow_mut().push_char(c); } - fn emit_char(&mut self, c: char) { + fn emit_char(&self, c: char) { self.process_token(CharacterTokens(StrTendril::from_char(match c { '\0' => '\u{FFFD}', c => c, }))); } - fn emit_short_tag(&mut self) { - self.current_tag_kind = ShortTag; - self.current_tag_name = StrTendril::new(); + fn emit_short_tag(&self) { + self.current_tag_kind.set(ShortTag); + *self.current_tag_name.borrow_mut() = StrTendril::new(); self.emit_current_tag(); } - fn emit_empty_tag(&mut self) { - self.current_tag_kind = EmptyTag; + fn emit_empty_tag(&self) { + self.current_tag_kind.set(EmptyTag); self.emit_current_tag(); } - fn set_empty_tag(&mut self) { - self.current_tag_kind = EmptyTag; + fn set_empty_tag(&self) { + self.current_tag_kind.set(EmptyTag); } - fn emit_start_tag(&mut self) { - self.current_tag_kind = StartTag; + fn emit_start_tag(&self) { + self.current_tag_kind.set(StartTag); self.emit_current_tag(); } - fn emit_current_tag(&mut self) { + fn emit_current_tag(&self) { self.finish_attribute(); - let qname = process_qname(replace(&mut self.current_tag_name, StrTendril::new())); + let qname = process_qname(replace( + &mut *self.current_tag_name.borrow_mut(), + StrTendril::new(), + )); - match self.current_tag_kind { + match self.current_tag_kind.get() { StartTag | EmptyTag => {}, EndTag => { - if !self.current_tag_attrs.is_empty() { + if !self.current_tag_attrs.borrow().is_empty() { self.emit_error(Borrowed("Attributes on an end tag")); } }, ShortTag => { - if !self.current_tag_attrs.is_empty() { + if !self.current_tag_attrs.borrow().is_empty() { self.emit_error(Borrowed("Attributes on a short tag")); } }, } let token = TagToken(Tag { - kind: self.current_tag_kind, + kind: self.current_tag_kind.get(), name: qname, - attrs: mem::take(&mut self.current_tag_attrs), + attrs: self.current_tag_attrs.take(), }); self.process_token(token); match self.sink.query_state_change() { None => (), - Some(s) => self.state = s, + Some(s) => self.state.set(s), } } // The string must not contain '\0'! - fn emit_chars(&mut self, b: StrTendril) { + fn emit_chars(&self, b: StrTendril) { self.process_token(CharacterTokens(b)); } // Emits the current Processing Instruction - fn emit_pi(&mut self) { + fn emit_pi(&self) { let token = PIToken(Pi { - target: replace(&mut self.current_pi_target, StrTendril::new()), - data: replace(&mut self.current_pi_data, StrTendril::new()), + target: replace(&mut *self.current_pi_target.borrow_mut(), StrTendril::new()), + data: replace(&mut *self.current_pi_data.borrow_mut(), StrTendril::new()), }); self.process_token(token); } - fn consume_char_ref(&mut self, addnl_allowed: Option<char>) { + fn consume_char_ref(&self, addnl_allowed: Option<char>) { // NB: The char ref tokenizer assumes we have an additional allowed // character iff we're tokenizing in an attribute value. - self.char_ref_tokenizer = Some(Box::new(CharRefTokenizer::new(addnl_allowed))); + *self.char_ref_tokenizer.borrow_mut() = + Some(Box::new(CharRefTokenizer::new(addnl_allowed))); } - fn emit_eof(&mut self) { + fn emit_eof(&self) { self.process_token(EOFToken); } - fn emit_error(&mut self, error: Cow<'static, str>) { + fn emit_error(&self, error: Cow<'static, str>) { self.process_token(ParseError(error)); } - fn emit_current_comment(&mut self) { - let comment = mem::take(&mut self.current_comment); + fn emit_current_comment(&self) { + let comment = self.current_comment.take(); self.process_token(CommentToken(comment)); } - fn emit_current_doctype(&mut self) { - let doctype = mem::take(&mut self.current_doctype); + fn emit_current_doctype(&self) { + let doctype = self.current_doctype.take(); self.process_token(DoctypeToken(doctype)); } - fn doctype_id(&mut self, kind: DoctypeKind) -> &mut Option<StrTendril> { + fn doctype_id(&self, kind: DoctypeKind) -> RefMut<Option<StrTendril>> { + let current_doctype = self.current_doctype.borrow_mut(); match kind { - Public => &mut self.current_doctype.public_id, - System => &mut self.current_doctype.system_id, + Public => RefMut::map(current_doctype, |d| &mut d.public_id), + System => RefMut::map(current_doctype, |d| &mut d.system_id), } } - fn clear_doctype_id(&mut self, kind: DoctypeKind) { - let id = self.doctype_id(kind); + fn clear_doctype_id(&self, kind: DoctypeKind) { + let mut id = self.doctype_id(kind); match *id { Some(ref mut s) => s.clear(), None => *id = Some(StrTendril::new()), } } - fn peek(&mut self, input: &BufferQueue) -> Option<char> { - if self.reconsume { - Some(self.current_char) + fn peek(&self, input: &BufferQueue) -> Option<char> { + if self.reconsume.get() { + Some(self.current_char.get()) } else { input.peek() } } - fn discard_char(&mut self, input: &BufferQueue) { + fn discard_char(&self, input: &BufferQueue) { let c = self.get_char(input); assert!(c.is_some()); } - fn unconsume(&mut self, input: &BufferQueue, buf: StrTendril) { + fn unconsume(&self, input: &BufferQueue, buf: StrTendril) { input.push_front(buf); } } @@ -519,30 +526,30 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { macro_rules! shorthand ( ( $me:ident : emit $c:expr ) => ( $me.emit_char($c) ); ( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c) ); - ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push_char($c) ); + ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.borrow_mut().push_char($c) ); ( $me:ident : discard_tag $input:expr ) => ( $me.discard_tag($input) ); ( $me:ident : discard_char ) => ( $me.discard_char() ); - ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push_char($c) ); + ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.borrow_mut().push_char($c) ); ( $me:ident : emit_temp ) => ( $me.emit_temp_buf() ); ( $me:ident : clear_temp ) => ( $me.clear_temp_buf() ); ( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c) ); - ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push_char($c) ); - ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push_char($c) ); - ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_tendril($c) ); - ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push_char($c) ); - ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c) ); + ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.borrow_mut().push_char($c) ); + ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_char($c) ); + ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_tendril($c)); + ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_char($c) ); + ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_slice($c) ); ( $me:ident : emit_comment ) => ( $me.emit_current_comment() ); - ( $me:ident : clear_comment ) => ( $me.current_comment.clear() ); - ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::default() ); - ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.name, $c) ); - ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push($me.doctype_id($k), $c) ); + ( $me:ident : clear_comment ) => ( $me.current_comment.borrow_mut().clear() ); + ( $me:ident : create_doctype ) => ( *$me.current_doctype.borrow_mut() = Doctype::default() ); + ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.borrow_mut().name, $c) ); + ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push(&mut $me.doctype_id($k), $c) ); ( $me:ident : clear_doctype_id $k:ident ) => ( $me.clear_doctype_id($k) ); ( $me:ident : emit_doctype ) => ( $me.emit_current_doctype() ); ( $me:ident : error ) => ( $me.bad_char_error() ); ( $me:ident : error_eof ) => ( $me.bad_eof_error() ); ( $me:ident : create_pi $c:expr ) => ( $me.create_pi($c) ); - ( $me:ident : push_pi_target $c:expr ) => ( $me.current_pi_target.push_char($c) ); - ( $me:ident : push_pi_data $c:expr ) => ( $me.current_pi_data.push_char($c) ); + ( $me:ident : push_pi_target $c:expr ) => ( $me.current_pi_target.borrow_mut().push_char($c) ); + ( $me:ident : push_pi_data $c:expr ) => ( $me.current_pi_data.borrow_mut().push_char($c) ); ( $me:ident : set_empty_tag ) => ( $me.set_empty_tag() ); ); @@ -569,45 +576,45 @@ macro_rules! go ( // These can only come at the end. - ( $me:ident : to $s:ident ) => ({ $me.state = states::$s; return true; }); - ( $me:ident : to $s:ident $k1:expr ) => ({ $me.state = states::$s($k1); return true; }); - ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state = states::$s($k1($k2)); return true; }); + ( $me:ident : to $s:ident ) => ({ $me.state.set(states::$s); return true; }); + ( $me:ident : to $s:ident $k1:expr ) => ({ $me.state.set(states::$s($k1)); return true; }); + ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state.set(states::$s($k1($k2))); return true; }); - ( $me:ident : reconsume $s:ident ) => ({ $me.reconsume = true; go!($me: to $s); }); - ( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1); }); - ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1 $k2); }); + ( $me:ident : reconsume $s:ident ) => ({ $me.reconsume.set(true); go!($me: to $s); }); + ( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1); }); + ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1 $k2); }); ( $me:ident : consume_char_ref ) => ({ $me.consume_char_ref(None); return true; }); ( $me:ident : consume_char_ref $addnl:expr ) => ({ $me.consume_char_ref(Some($addnl)); return true; }); // We have a default next state after emitting a tag, but the sink can override. ( $me:ident : emit_tag $s:ident ) => ({ - $me.state = states::$s; + $me.state.set(states::$s); $me.emit_current_tag(); return true; }); // We have a special when dealing with empty and short tags in Xml ( $me:ident : emit_short_tag $s:ident ) => ({ - $me.state = states::$s; + $me.state.set(states::$s); $me.emit_short_tag(); return true; }); ( $me:ident : emit_empty_tag $s:ident ) => ({ - $me.state = states::$s; + $me.state.set(states::$s); $me.emit_empty_tag(); return true; }); ( $me:ident : emit_start_tag $s:ident ) => ({ - $me.state = states::$s; + $me.state.set(states::$s); $me.emit_start_tag(); return true; }); ( $me:ident : emit_pi $s:ident ) => ({ - $me.state = states::$s; + $me.state.set(states::$s); $me.emit_pi(); return true; }); @@ -640,15 +647,15 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { // Return true if we should be immediately re-invoked // (this just simplifies control flow vs. break / continue). #[allow(clippy::never_loop)] - fn step(&mut self, input: &BufferQueue) -> bool { - if self.char_ref_tokenizer.is_some() { + fn step(&self, input: &BufferQueue) -> bool { + if self.char_ref_tokenizer.borrow().is_some() { return self.step_char_ref_tokenizer(input); } debug!("processing in state {:?}", self.state); - match self.state { + match self.state.get() { XmlState::Quiescent => { - self.state = XmlState::Data; + self.state.set(XmlState::Data); false }, //§ data-state @@ -1078,7 +1085,7 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { } /// Indicate that we have reached the end of the input. - pub fn end(&mut self) { + pub fn end(&self) { // Handle EOF in the char ref sub-tokenizer, if there is one. // Do this first because it might un-consume stuff. let input = BufferQueue::default(); @@ -1092,7 +1099,7 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { // Process all remaining buffered input. // If we're waiting for lookahead, we're not gonna get it. - self.at_eof = true; + self.at_eof.set(true); self.run(&input); while self.eof_step() { @@ -1113,8 +1120,12 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { #[cfg(not(for_c))] fn dump_profile(&self) { - let mut results: Vec<(states::XmlState, u64)> = - self.state_profile.iter().map(|(s, t)| (*s, *t)).collect(); + let mut results: Vec<(states::XmlState, u64)> = self + .state_profile + .borrow() + .iter() + .map(|(s, t)| (*s, *t)) + .collect(); results.sort_by(|&(_, x), &(_, y)| y.cmp(&x)); let total: u64 = results @@ -1122,7 +1133,10 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { .map(|&(_, t)| t) .fold(0, ::std::ops::Add::add); debug!("\nTokenizer profile, in nanoseconds"); - debug!("\n{:12} total in token sink", self.time_in_sink); + debug!( + "\n{:12} total in token sink", + self.time_in_sink.get() + ); debug!("\n{:12} total in tokenizer", total); for (k, v) in results.into_iter() { @@ -1131,9 +1145,9 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { } } - fn eof_step(&mut self) -> bool { - debug!("processing EOF in state {:?}", self.state); - match self.state { + fn eof_step(&self) -> bool { + debug!("processing EOF in state {:?}", self.state.get()); + match self.state.get() { XmlState::Data | XmlState::Quiescent => go!(self: eof), XmlState::CommentStart | XmlState::CommentLessThan | XmlState::CommentLessThanBang => { go!(self: reconsume Comment) @@ -1180,7 +1194,7 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { } } - fn process_char_ref(&mut self, char_ref: CharRef) { + fn process_char_ref(&self, char_ref: CharRef) { let CharRef { mut chars, mut num_chars, @@ -1193,20 +1207,20 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { for i in 0..num_chars { let c = chars[i as usize]; - match self.state { + match self.state.get() { states::Data | states::Cdata => go!(self: emit c), states::TagAttrValue(_) => go!(self: push_value c), _ => panic!( "state {:?} should not be reachable in process_char_ref", - self.state + self.state.get() ), } } } - fn step_char_ref_tokenizer(&mut self, input: &BufferQueue) -> bool { + fn step_char_ref_tokenizer(&self, input: &BufferQueue) -> bool { let mut tok = self.char_ref_tokenizer.take().unwrap(); let outcome = tok.step(self, input); @@ -1220,12 +1234,12 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { char_ref::Progress => true, }; - self.char_ref_tokenizer = Some(tok); + *self.char_ref_tokenizer.borrow_mut() = Some(tok); progress } - fn finish_attribute(&mut self) { - if self.current_attr_name.is_empty() { + fn finish_attribute(&self) { + if self.current_attr_name.borrow().is_empty() { return; } @@ -1233,37 +1247,42 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> { // FIXME: the spec says we should error as soon as the name is finished. // FIXME: linear time search, do we care? let dup = { - let name = &self.current_attr_name[..]; + let current_attr_name = self.current_attr_name.borrow(); + let name = &current_attr_name[..]; self.current_tag_attrs + .borrow() .iter() .any(|a| &*a.name.local == name) }; if dup { self.emit_error(Borrowed("Duplicate attribute")); - self.current_attr_name.clear(); - self.current_attr_value.clear(); + self.current_attr_name.borrow_mut().clear(); + self.current_attr_value.borrow_mut().clear(); } else { - let qname = process_qname(replace(&mut self.current_attr_name, StrTendril::new())); + let qname = process_qname(replace( + &mut self.current_attr_name.borrow_mut(), + StrTendril::new(), + )); let attr = Attribute { name: qname.clone(), - value: replace(&mut self.current_attr_value, StrTendril::new()), + value: replace(&mut self.current_attr_value.borrow_mut(), StrTendril::new()), }; if qname.local == local_name!("xmlns") || qname.prefix == Some(namespace_prefix!("xmlns")) { - self.current_tag_attrs.insert(0, attr); + self.current_tag_attrs.borrow_mut().insert(0, attr); } else { - self.current_tag_attrs.push(attr); + self.current_tag_attrs.borrow_mut().push(attr); } } } - fn create_attribute(&mut self, c: char) { + fn create_attribute(&self, c: char) { self.finish_attribute(); - self.current_attr_name.push_char(c); + self.current_attr_name.borrow_mut().push_char(c); } } diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index 4bf602fd..b76eff0b 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -14,6 +14,7 @@ use mac::unwrap_or_return; use markup5ever::{local_name, namespace_prefix, namespace_url, ns}; use std::borrow::Cow; use std::borrow::Cow::Borrowed; +use std::cell::{Cell, Ref, RefCell}; use std::collections::btree_map::Iter; use std::collections::{BTreeMap, HashSet, VecDeque}; use std::fmt::{Debug, Error, Formatter}; @@ -182,22 +183,22 @@ pub struct XmlTreeBuilder<Handle, Sink> { doc_handle: Handle, /// Next state change for the tokenizer, if any. - next_tokenizer_state: Option<tokenizer::states::XmlState>, + next_tokenizer_state: Cell<Option<tokenizer::states::XmlState>>, /// Stack of open elements, most recently added at end. - open_elems: Vec<Handle>, + open_elems: RefCell<Vec<Handle>>, /// Current element pointer. - curr_elem: Option<Handle>, + curr_elem: RefCell<Option<Handle>>, /// Stack of namespace identifiers and namespaces. - namespace_stack: NamespaceMapStack, + namespace_stack: RefCell<NamespaceMapStack>, /// Current namespace identifier - current_namespace: NamespaceMap, + current_namespace: RefCell<NamespaceMap>, /// Current tree builder phase. - phase: XmlPhase, + phase: Cell<XmlPhase>, } impl<Handle, Sink> XmlTreeBuilder<Handle, Sink> where @@ -207,18 +208,18 @@ where /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. /// /// The tree builder is also a `TokenSink`. - pub fn new(mut sink: Sink, opts: XmlTreeBuilderOpts) -> XmlTreeBuilder<Handle, Sink> { + pub fn new(sink: Sink, opts: XmlTreeBuilderOpts) -> XmlTreeBuilder<Handle, Sink> { let doc_handle = sink.get_document(); XmlTreeBuilder { _opts: opts, sink, doc_handle, - next_tokenizer_state: None, - open_elems: vec![], - curr_elem: None, - namespace_stack: NamespaceMapStack::new(), - current_namespace: NamespaceMap::empty(), - phase: Start, + next_tokenizer_state: Cell::new(None), + open_elems: RefCell::new(vec![]), + curr_elem: RefCell::new(None), + namespace_stack: RefCell::new(NamespaceMapStack::new()), + current_namespace: RefCell::new(NamespaceMap::empty()), + phase: Cell::new(Start), } } @@ -226,10 +227,10 @@ where /// internal state. This is intended to support garbage-collected DOMs. pub fn trace_handles(&self, tracer: &dyn Tracer<Handle = Handle>) { tracer.trace_handle(&self.doc_handle); - for e in self.open_elems.iter() { + for e in self.open_elems.borrow().iter() { tracer.trace_handle(e); } - if let Some(h) = self.curr_elem.as_ref() { + if let Some(h) = self.curr_elem.borrow().as_ref() { tracer.trace_handle(h); } } @@ -240,7 +241,7 @@ where fn dump_state(&self, label: String) { debug!("dump_state on {}", label); debug!(" open_elems:"); - for node in self.open_elems.iter() { + for node in self.open_elems.borrow().iter() { debug!(" {:?}", self.sink.elem_name(node)); } debug!(""); @@ -258,8 +259,8 @@ where ); } - fn declare_ns(&mut self, attr: &mut Attribute) { - if let Err(msg) = self.current_namespace.insert_ns(attr) { + fn declare_ns(&self, attr: &mut Attribute) { + if let Err(msg) = self.current_namespace.borrow_mut().insert_ns(attr) { self.sink.parse_error(msg); } else { attr.name.ns = ns!(xmlns); @@ -269,11 +270,13 @@ where fn find_uri(&self, prefix: &Option<Prefix>) -> Result<Option<Namespace>, Cow<'static, str>> { let mut uri = Err(Borrowed("No appropriate namespace found")); + let current_namespace = self.current_namespace.borrow(); for ns in self .namespace_stack + .borrow() .0 .iter() - .chain(Some(&self.current_namespace)) + .chain(Some(&*current_namespace)) .rev() { if let Some(el) = ns.get(prefix) { @@ -284,7 +287,7 @@ where uri } - fn bind_qname(&mut self, name: &mut QualName) { + fn bind_qname(&self, name: &mut QualName) { match self.find_uri(&name.prefix) { Ok(uri) => { let ns_uri = match uri { @@ -304,7 +307,7 @@ where // // Returns false if the attribute is a duplicate, returns true otherwise. fn bind_attr_qname( - &mut self, + &self, present_attrs: &mut HashSet<(Namespace, LocalName)>, name: &mut QualName, ) -> bool { @@ -331,7 +334,7 @@ where true } - fn process_namespaces(&mut self, tag: &mut Tag) { + fn process_namespaces(&self, tag: &mut Tag) { // List of already present namespace local name attribute pairs. let mut present_attrs: HashSet<(Namespace, LocalName)> = Default::default(); @@ -359,24 +362,27 @@ where self.bind_qname(&mut tag.name); // Finally, we dump current namespace if its unneeded. - let x = mem::replace(&mut self.current_namespace, NamespaceMap::empty()); + let x = mem::replace( + &mut *self.current_namespace.borrow_mut(), + NamespaceMap::empty(), + ); // Only start tag doesn't dump current namespace. However, <script /> is treated // differently than every other empty tag, so it needs to retain the current // namespace as well. if tag.kind == StartTag || (tag.kind == EmptyTag && tag.name.local == local_name!("script")) { - self.namespace_stack.push(x); + self.namespace_stack.borrow_mut().push(x); } } - fn process_to_completion(&mut self, mut token: Token) { + fn process_to_completion(&self, mut token: Token) { // Queue of additional tokens yet to be processed. // This stays empty in the common case where we don't split whitespace. let mut more_tokens = VecDeque::new(); loop { - let phase = self.phase; + let phase = self.phase.get(); #[allow(clippy::unused_unit)] match self.step(phase, token) { @@ -384,7 +390,7 @@ where token = unwrap_or_return!(more_tokens.pop_front(), ()); }, Reprocess(m, t) => { - self.phase = m; + self.phase.set(m); token = t; }, } @@ -397,7 +403,7 @@ where Handle: Clone, Sink: TreeSink<Handle = Handle>, { - fn process_token(&mut self, token: tokenizer::Token) { + fn process_token(&self, token: tokenizer::Token) { // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type. let token = match token { tokenizer::ParseError(e) => { @@ -417,13 +423,13 @@ where self.process_to_completion(token); } - fn end(&mut self) { - for node in self.open_elems.drain(..).rev() { + fn end(&self) { + for node in self.open_elems.borrow_mut().drain(..).rev() { self.sink.pop(&node); } } - fn query_state_change(&mut self) -> Option<tokenizer::states::XmlState> { + fn query_state_change(&self) -> Option<tokenizer::states::XmlState> { self.next_tokenizer_state.take() } } @@ -438,56 +444,60 @@ where Handle: Clone, Sink: TreeSink<Handle = Handle>, { - fn current_node(&self) -> &Handle { - self.open_elems.last().expect("no current element") + fn current_node(&self) -> Ref<Handle> { + Ref::map(self.open_elems.borrow(), |elems| { + elems.last().expect("no current element") + }) } - fn insert_appropriately(&mut self, child: NodeOrText<Handle>) { - let target = current_node(&self.open_elems); + fn insert_appropriately(&self, child: NodeOrText<Handle>) { + let open_elems = self.open_elems.borrow(); + let target = current_node(&open_elems); self.sink.append(target, child); } - fn insert_tag(&mut self, tag: Tag) -> XmlProcessResult { - let child = create_element(&mut self.sink, tag.name, tag.attrs); + fn insert_tag(&self, tag: Tag) -> XmlProcessResult { + let child = create_element(&self.sink, tag.name, tag.attrs); self.insert_appropriately(AppendNode(child.clone())); self.add_to_open_elems(child) } - fn append_tag(&mut self, tag: Tag) -> XmlProcessResult { - let child = create_element(&mut self.sink, tag.name, tag.attrs); + fn append_tag(&self, tag: Tag) -> XmlProcessResult { + let child = create_element(&self.sink, tag.name, tag.attrs); self.insert_appropriately(AppendNode(child.clone())); self.sink.pop(&child); Done } - fn append_tag_to_doc(&mut self, tag: Tag) -> Handle { - let child = create_element(&mut self.sink, tag.name, tag.attrs); + fn append_tag_to_doc(&self, tag: Tag) -> Handle { + let child = create_element(&self.sink, tag.name, tag.attrs); self.sink .append(&self.doc_handle, AppendNode(child.clone())); child } - fn add_to_open_elems(&mut self, el: Handle) -> XmlProcessResult { - self.open_elems.push(el); + fn add_to_open_elems(&self, el: Handle) -> XmlProcessResult { + self.open_elems.borrow_mut().push(el); Done } - fn append_comment_to_doc(&mut self, text: StrTendril) -> XmlProcessResult { + fn append_comment_to_doc(&self, text: StrTendril) -> XmlProcessResult { let comment = self.sink.create_comment(text); self.sink.append(&self.doc_handle, AppendNode(comment)); Done } - fn append_comment_to_tag(&mut self, text: StrTendril) -> XmlProcessResult { - let target = current_node(&self.open_elems); + fn append_comment_to_tag(&self, text: StrTendril) -> XmlProcessResult { + let open_elems = self.open_elems.borrow(); + let target = current_node(&open_elems); let comment = self.sink.create_comment(text); self.sink.append(target, AppendNode(comment)); Done } - fn append_doctype_to_doc(&mut self, doctype: Doctype) -> XmlProcessResult { + fn append_doctype_to_doc(&self, doctype: Doctype) -> XmlProcessResult { fn get_tendril(opt: Option<StrTendril>) -> StrTendril { match opt { Some(expr) => expr, @@ -502,33 +512,35 @@ where Done } - fn append_pi_to_doc(&mut self, pi: Pi) -> XmlProcessResult { + fn append_pi_to_doc(&self, pi: Pi) -> XmlProcessResult { let pi = self.sink.create_pi(pi.target, pi.data); self.sink.append(&self.doc_handle, AppendNode(pi)); Done } - fn append_pi_to_tag(&mut self, pi: Pi) -> XmlProcessResult { - let target = current_node(&self.open_elems); + fn append_pi_to_tag(&self, pi: Pi) -> XmlProcessResult { + let open_elems = self.open_elems.borrow(); + let target = current_node(&open_elems); let pi = self.sink.create_pi(pi.target, pi.data); self.sink.append(target, AppendNode(pi)); Done } - fn append_text(&mut self, chars: StrTendril) -> XmlProcessResult { + fn append_text(&self, chars: StrTendril) -> XmlProcessResult { self.insert_appropriately(AppendText(chars)); Done } fn tag_in_open_elems(&self, tag: &Tag) -> bool { self.open_elems + .borrow() .iter() .any(|a| self.sink.elem_name(a) == tag.name.expanded()) } // Pop elements until an element from the set has been popped. Returns the // number of elements popped. - fn pop_until<P>(&mut self, pred: P) + fn pop_until<P>(&self, pred: P) where P: Fn(ExpandedName) -> bool, { @@ -545,17 +557,17 @@ where TagSet: Fn(ExpandedName) -> bool, { // FIXME: take namespace into consideration: - set(self.sink.elem_name(self.current_node())) + set(self.sink.elem_name(&self.current_node())) } - fn close_tag(&mut self, tag: Tag) -> XmlProcessResult { + fn close_tag(&self, tag: Tag) -> XmlProcessResult { debug!( "Close tag: current_node.name {:?} \n Current tag {:?}", - self.sink.elem_name(self.current_node()), + self.sink.elem_name(&self.current_node()), &tag.name ); - if *self.sink.elem_name(self.current_node()).local != tag.name.local { + if *self.sink.elem_name(&self.current_node()).local != tag.name.local { self.sink .parse_error(Borrowed("Current node doesn't match tag")); } @@ -571,25 +583,30 @@ where } fn no_open_elems(&self) -> bool { - self.open_elems.is_empty() + self.open_elems.borrow().is_empty() } - fn pop(&mut self) -> Handle { - self.namespace_stack.pop(); - let node = self.open_elems.pop().expect("no current element"); + fn pop(&self) -> Handle { + self.namespace_stack.borrow_mut().pop(); + let node = self + .open_elems + .borrow_mut() + .pop() + .expect("no current element"); self.sink.pop(&node); node } - fn stop_parsing(&mut self) -> XmlProcessResult { + fn stop_parsing(&self) -> XmlProcessResult { warn!("stop_parsing for XML5 not implemented, full speed ahead!"); Done } - fn complete_script(&mut self) { - let current = current_node(&self.open_elems); + fn complete_script(&self) { + let open_elems = self.open_elems.borrow(); + let current = current_node(&open_elems); if self.sink.complete_script(current) == NextParserState::Suspend { - self.next_tokenizer_state = Some(Quiescent); + self.next_tokenizer_state.set(Some(Quiescent)); } } } @@ -605,7 +622,7 @@ where Handle: Clone, Sink: TreeSink<Handle = Handle>, { - fn step(&mut self, mode: XmlPhase, token: Token) -> XmlProcessResult { + fn step(&self, mode: XmlPhase, token: Token) -> XmlProcessResult { self.debug_step(mode, &token); match mode { @@ -624,7 +641,7 @@ where self.process_namespaces(&mut tag); tag }; - self.phase = Main; + self.phase.set(Main); let handle = self.append_tag_to_doc(tag); self.add_to_open_elems(handle) }, @@ -642,7 +659,7 @@ where self.process_namespaces(&mut tag); tag }; - self.phase = End; + self.phase.set(End); let handle = self.append_tag_to_doc(tag); self.sink.pop(&handle); Done @@ -724,14 +741,14 @@ where } let retval = self.close_tag(tag); if self.no_open_elems() { - self.phase = End; + self.phase.set(End); } retval }, Tag(Tag { kind: ShortTag, .. }) => { self.pop(); if self.no_open_elems() { - self.phase = End; + self.phase.set(End); } Done },