Skip to content

Commit be09876

Browse files
Turbo87Mingun
authored andcommitted
Add BytesCData::escaped() fn
1 parent a5ad85e commit be09876

File tree

2 files changed

+91
-2
lines changed

2 files changed

+91
-2
lines changed

Changelog.md

+4
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@
1515

1616
### New Features
1717

18+
- [#831]: Add `BytesCData::escaped()` fn to construct CDATA events from arbitrary user input.
19+
1820
### Bug Fixes
1921

2022
### Misc Changes
2123

24+
[#831]: https://github.com/tafia/quick-xml/issues/831
25+
2226

2327
## 0.37.0 -- 2024-10-27
2428

src/events/mod.rs

+87-2
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub mod attributes;
4141
use encoding_rs::Encoding;
4242
use std::borrow::Cow;
4343
use std::fmt::{self, Debug, Formatter};
44+
use std::iter::FusedIterator;
4445
use std::mem::replace;
4546
use std::ops::Deref;
4647
use std::str::from_utf8;
@@ -53,7 +54,7 @@ use crate::escape::{
5354
use crate::name::{LocalName, QName};
5455
#[cfg(feature = "serialize")]
5556
use crate::utils::CowRef;
56-
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string};
57+
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
5758
use attributes::{AttrError, Attribute, Attributes};
5859

5960
/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
@@ -700,12 +701,53 @@ impl<'a> BytesCData<'a> {
700701
///
701702
/// # Warning
702703
///
703-
/// `content` must not contain the `]]>` sequence.
704+
/// `content` must not contain the `]]>` sequence. You can use
705+
/// [`BytesCData::escaped`] to escape the content instead.
704706
#[inline]
705707
pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
706708
Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
707709
}
708710

711+
/// Creates an iterator of `BytesCData` from a string.
712+
///
713+
/// If a string contains `]]>`, it needs to be split into multiple `CDATA`
714+
/// sections, splitting the `]]` and `>` characters, because the CDATA closing
715+
/// sequence cannot be escaped. This iterator yields a `BytesCData` instance
716+
/// for each of those sections.
717+
///
718+
/// # Examples
719+
///
720+
/// ```
721+
/// # use quick_xml::events::BytesCData;
722+
/// # use pretty_assertions::assert_eq;
723+
/// let content = "";
724+
/// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
725+
/// assert_eq!(cdata, &[BytesCData::new("")]);
726+
///
727+
/// let content = "Certain tokens like ]]> can be difficult and <invalid>";
728+
/// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
729+
/// assert_eq!(cdata, &[
730+
/// BytesCData::new("Certain tokens like ]]"),
731+
/// BytesCData::new("> can be difficult and <invalid>"),
732+
/// ]);
733+
///
734+
/// let content = "foo]]>bar]]>baz]]>quux";
735+
/// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
736+
/// assert_eq!(cdata, &[
737+
/// BytesCData::new("foo]]"),
738+
/// BytesCData::new(">bar]]"),
739+
/// BytesCData::new(">baz]]"),
740+
/// BytesCData::new(">quux"),
741+
/// ]);
742+
/// ```
743+
#[inline]
744+
pub fn escaped(content: &'a str) -> CDataIterator<'a> {
745+
CDataIterator {
746+
unprocessed: content.as_bytes(),
747+
finished: false,
748+
}
749+
}
750+
709751
/// Ensures that all data is owned to extend the object's lifetime if
710752
/// necessary.
711753
#[inline]
@@ -833,6 +875,49 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
833875
}
834876
}
835877

878+
/// Iterator over `CDATA` sections in a string.
879+
///
880+
/// This iterator is created by the [`BytesCData::escaped`] method.
881+
#[derive(Clone)]
882+
pub struct CDataIterator<'a> {
883+
/// The unprocessed data which should be emitted as `BytesCData` events.
884+
/// At each iteration, the processed data is cut from this slice.
885+
unprocessed: &'a [u8],
886+
finished: bool,
887+
}
888+
889+
impl<'a> Debug for CDataIterator<'a> {
890+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
891+
f.debug_struct("CDataIterator")
892+
.field("unprocessed", &Bytes(self.unprocessed))
893+
.field("finished", &self.finished)
894+
.finish()
895+
}
896+
}
897+
898+
impl<'a> Iterator for CDataIterator<'a> {
899+
type Item = BytesCData<'a>;
900+
901+
fn next(&mut self) -> Option<BytesCData<'a>> {
902+
if self.finished {
903+
return None;
904+
}
905+
906+
for gt in memchr::memchr_iter(b'>', self.unprocessed) {
907+
if self.unprocessed[..gt].ends_with(b"]]") {
908+
let (slice, rest) = self.unprocessed.split_at(gt);
909+
self.unprocessed = rest;
910+
return Some(BytesCData::wrap(slice, Decoder::utf8()));
911+
}
912+
}
913+
914+
self.finished = true;
915+
Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
916+
}
917+
}
918+
919+
impl FusedIterator for CDataIterator<'_> {}
920+
836921
////////////////////////////////////////////////////////////////////////////////////////////////////
837922

838923
/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.

0 commit comments

Comments
 (0)