@@ -41,6 +41,7 @@ pub mod attributes;
41
41
use encoding_rs:: Encoding ;
42
42
use std:: borrow:: Cow ;
43
43
use std:: fmt:: { self , Debug , Formatter } ;
44
+ use std:: iter:: FusedIterator ;
44
45
use std:: mem:: replace;
45
46
use std:: ops:: Deref ;
46
47
use std:: str:: from_utf8;
@@ -53,7 +54,7 @@ use crate::escape::{
53
54
use crate :: name:: { LocalName , QName } ;
54
55
#[ cfg( feature = "serialize" ) ]
55
56
use crate :: utils:: CowRef ;
56
- use crate :: utils:: { name_len, trim_xml_end, trim_xml_start, write_cow_string} ;
57
+ use crate :: utils:: { name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes } ;
57
58
use attributes:: { AttrError , Attribute , Attributes } ;
58
59
59
60
/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
@@ -700,12 +701,53 @@ impl<'a> BytesCData<'a> {
700
701
///
701
702
/// # Warning
702
703
///
703
- /// `content` must not contain the `]]>` sequence.
704
+ /// `content` must not contain the `]]>` sequence. You can use
705
+ /// [`BytesCData::escaped`] to escape the content instead.
704
706
#[ inline]
705
707
pub fn new < C : Into < Cow < ' a , str > > > ( content : C ) -> Self {
706
708
Self :: wrap ( str_cow_to_bytes ( content) , Decoder :: utf8 ( ) )
707
709
}
708
710
711
+ /// Creates an iterator of `BytesCData` from a string.
712
+ ///
713
+ /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
714
+ /// sections, splitting the `]]` and `>` characters, because the CDATA closing
715
+ /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
716
+ /// for each of those sections.
717
+ ///
718
+ /// # Examples
719
+ ///
720
+ /// ```
721
+ /// # use quick_xml::events::BytesCData;
722
+ /// # use pretty_assertions::assert_eq;
723
+ /// let content = "";
724
+ /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
725
+ /// assert_eq!(cdata, &[BytesCData::new("")]);
726
+ ///
727
+ /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
728
+ /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
729
+ /// assert_eq!(cdata, &[
730
+ /// BytesCData::new("Certain tokens like ]]"),
731
+ /// BytesCData::new("> can be difficult and <invalid>"),
732
+ /// ]);
733
+ ///
734
+ /// let content = "foo]]>bar]]>baz]]>quux";
735
+ /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
736
+ /// assert_eq!(cdata, &[
737
+ /// BytesCData::new("foo]]"),
738
+ /// BytesCData::new(">bar]]"),
739
+ /// BytesCData::new(">baz]]"),
740
+ /// BytesCData::new(">quux"),
741
+ /// ]);
742
+ /// ```
743
+ #[ inline]
744
+ pub fn escaped ( content : & ' a str ) -> CDataIterator < ' a > {
745
+ CDataIterator {
746
+ unprocessed : content. as_bytes ( ) ,
747
+ finished : false ,
748
+ }
749
+ }
750
+
709
751
/// Ensures that all data is owned to extend the object's lifetime if
710
752
/// necessary.
711
753
#[ inline]
@@ -833,6 +875,49 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
833
875
}
834
876
}
835
877
878
+ /// Iterator over `CDATA` sections in a string.
879
+ ///
880
+ /// This iterator is created by the [`BytesCData::escaped`] method.
881
+ #[ derive( Clone ) ]
882
+ pub struct CDataIterator < ' a > {
883
+ /// The unprocessed data which should be emitted as `BytesCData` events.
884
+ /// At each iteration, the processed data is cut from this slice.
885
+ unprocessed : & ' a [ u8 ] ,
886
+ finished : bool ,
887
+ }
888
+
889
+ impl < ' a > Debug for CDataIterator < ' a > {
890
+ fn fmt ( & self , f : & mut Formatter ) -> fmt:: Result {
891
+ f. debug_struct ( "CDataIterator" )
892
+ . field ( "unprocessed" , & Bytes ( self . unprocessed ) )
893
+ . field ( "finished" , & self . finished )
894
+ . finish ( )
895
+ }
896
+ }
897
+
898
+ impl < ' a > Iterator for CDataIterator < ' a > {
899
+ type Item = BytesCData < ' a > ;
900
+
901
+ fn next ( & mut self ) -> Option < BytesCData < ' a > > {
902
+ if self . finished {
903
+ return None ;
904
+ }
905
+
906
+ for gt in memchr:: memchr_iter ( b'>' , self . unprocessed ) {
907
+ if self . unprocessed [ ..gt] . ends_with ( b"]]" ) {
908
+ let ( slice, rest) = self . unprocessed . split_at ( gt) ;
909
+ self . unprocessed = rest;
910
+ return Some ( BytesCData :: wrap ( slice, Decoder :: utf8 ( ) ) ) ;
911
+ }
912
+ }
913
+
914
+ self . finished = true ;
915
+ Some ( BytesCData :: wrap ( self . unprocessed , Decoder :: utf8 ( ) ) )
916
+ }
917
+ }
918
+
919
+ impl FusedIterator for CDataIterator < ' _ > { }
920
+
836
921
////////////////////////////////////////////////////////////////////////////////////////////////////
837
922
838
923
/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
0 commit comments