Skip to content

Commit

Permalink
Support decoding nested arrays and arrays of multiline strings
Browse files Browse the repository at this point in the history
  • Loading branch information
BOOMeranGG committed Mar 6, 2025
1 parent 0ee1a46 commit 84a732b
Show file tree
Hide file tree
Showing 14 changed files with 537 additions and 178 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ We are still developing and testing this library, so it has several limitations:
:white_check_mark: Local Date (to `LocalDate` of [kotlinx-datetime](https://github.com/Kotlin/kotlinx-datetime)) \
:white_check_mark: Local Time (to `LocalTime` of [kotlinx-datetime](https://github.com/Kotlin/kotlinx-datetime)) \
:white_check_mark: Multiline Strings \
:white_check_mark: Arrays (including multiline arrays) \
:white_check_mark: Arrays (including multiline and nested arrays) \
:white_check_mark: Maps (for anonymous key-value pairs) \
:x: Arrays: nested; of Different Types \
:x: Arrays: of Different Types \
:x: Nested Inline Tables \
:x: Array of Tables \
:x: Inline Array of Tables
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import com.akuleshov7.ktoml.TomlInputConfig
import com.akuleshov7.ktoml.tree.nodes.TomlKeyValue
import com.akuleshov7.ktoml.tree.nodes.TomlKeyValueArray
import com.akuleshov7.ktoml.tree.nodes.TomlKeyValuePrimitive
import com.akuleshov7.ktoml.tree.nodes.pairs.values.TomlArray
import com.akuleshov7.ktoml.tree.nodes.pairs.values.TomlNull
import com.akuleshov7.ktoml.tree.nodes.pairs.values.TomlValue
import kotlinx.serialization.DeserializationStrategy
Expand All @@ -26,7 +27,7 @@ public class TomlArrayDecoder(
private var nextElementIndex = 0
private val list = rootNode.value.content as List<TomlValue>
override val serializersModule: SerializersModule = EmptySerializersModule()
private lateinit var currentElementDecoder: TomlPrimitiveDecoder
private lateinit var currentElementDecoder: TomlAbstractDecoder
private lateinit var currentPrimitiveElementOfArray: TomlValue

private fun haveStartedReadingElements() = nextElementIndex > 0
Expand All @@ -40,16 +41,29 @@ public class TomlArrayDecoder(

currentPrimitiveElementOfArray = list[nextElementIndex]

currentElementDecoder = TomlPrimitiveDecoder(
// a small hack that creates a PrimitiveKeyValue node that is used in the decoder
TomlKeyValuePrimitive(
rootNode.key,
currentPrimitiveElementOfArray,
rootNode.lineNo,
comments = emptyList(),
inlineComment = "",
currentElementDecoder = if (currentPrimitiveElementOfArray is TomlArray) {
TomlArrayDecoder(
TomlKeyValueArray(
rootNode.key,
currentPrimitiveElementOfArray,
rootNode.lineNo,
comments = emptyList(),
inlineComment = "",
),
config
)
)
} else {
TomlPrimitiveDecoder(
// a small hack that creates a PrimitiveKeyValue node that is used in the decoder
TomlKeyValuePrimitive(
rootNode.key,
currentPrimitiveElementOfArray,
rootNode.lineNo,
comments = emptyList(),
inlineComment = "",
)
)
}
return nextElementIndex++
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,16 @@ internal fun String.trimDoubleBrackets(): String = trimSymbols(this, "[[", "]]")
* @param allowEscapedQuotesInLiteralStrings value from TomlInputConfig
* @return The text before a comment, i.e.
* ```kotlin
* "a = 0 # Comment".takeBeforeComment() == "a = 0"
* "a = 0 # Comment".takeBeforeComment() == "a = 0 "
* ```
*/
internal fun String.takeBeforeComment(allowEscapedQuotesInLiteralStrings: Boolean): String {
val commentStartIndex = getCommentStartIndex(allowEscapedQuotesInLiteralStrings)

return if (commentStartIndex == -1) {
this.trim()
this
} else {
this.substring(0, commentStartIndex).trim()
this.substring(0, commentStartIndex)
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package com.akuleshov7.ktoml.parsers

import com.akuleshov7.ktoml.TomlInputConfig
import com.akuleshov7.ktoml.exceptions.ParseException
import com.akuleshov7.ktoml.parsers.enums.MultilineType
import com.akuleshov7.ktoml.utils.LinesIteratorWrapper
import com.akuleshov7.ktoml.utils.newLineChar

/**
* @param config
* @param linesIteratorWrapper - iterator with the rest of the toml data
* @param firstLine - first line of multiline value where it was detected
*/
internal class TomlMultilineString(
private val config: TomlInputConfig,
private val linesIteratorWrapper: LinesIteratorWrapper<String>,
firstLine: String,
) {
private val comments: MutableList<String> = mutableListOf()
private val lines: MutableList<String> = mutableListOf()
private val startLineNo = linesIteratorWrapper.lineNo
private val multilineType = getMultilineType(firstLine, config)
private var isInMultilineBasic = false
private var isInMultilineLiteral = false

// If isNested is null, we don't know yet if the type is nested
private var isNested = if (multilineType.isNestedSupported) null else false

init {
if (multilineType == MultilineType.NOT_A_MULTILINE) {
throw ParseException("Internal parse exception", startLineNo)
}
trackMultilineString(firstLine)
lines.add(firstLine.takeBeforeComment(config.allowEscapedQuotesInLiteralStrings))
parseMultiline()
}

fun getLine(): String = if (multilineType == MultilineType.ARRAY) {
lines.joinToString(newLineChar().toString()) {
it.takeBeforeComment(config.allowEscapedQuotesInLiteralStrings)
}
} else {
// we can't have comments inside multi-line basic/literal string
lines.joinToString(newLineChar().toString())
}

fun getComments(): List<String> = comments

private fun parseMultiline() {
var hasFoundEnd = false

while (linesIteratorWrapper.hasNext()) {
val line = linesIteratorWrapper.next()
trackMultilineString(line)

if (!stringTypes.contains(multilineType)) {
if (!isInMultilineString()) {
comments.add(line.trimComment(config.allowEscapedQuotesInLiteralStrings))
lines.add(line.takeBeforeComment(config.allowEscapedQuotesInLiteralStrings))
} else {
// We're inside multiline basic/literal string element, so there's no comments
lines.add(line)
}
} else {
// We have multiline basic/literal string MultilineType; They don't have comments inside
lines.add(line)
}

if (!isInMultilineString() && isEndOfMultilineValue(multilineType)) {
hasFoundEnd = true
break
}
}

if (!hasFoundEnd) {
throw ParseException(
"Expected (${multilineType.closingSymbols}) in the end of ${multilineType.name}",
startLineNo,
)
}
}

/**
* When we have an array with multiline strings, and we're parsing line X
* we want to know if multiline string was open before line X
*/
private fun trackMultilineString(line: String) {
if (stringTypes.contains(multilineType)) {
return
}
for (i in 0..line.length - 3) {
// Stumbled upon a comment, no need to analyze for the rest of the line
if (!isInMultilineBasic && !isInMultilineLiteral && line[i] == '#') {
break
}

if (!isInMultilineLiteral && isNextThreeQuotes(line, i, '"')) {
isInMultilineBasic = !isInMultilineBasic
} else if (!isInMultilineBasic && isNextThreeQuotes(line, i, '\'')) {
isInMultilineLiteral = !isInMultilineLiteral
}
}
}

private fun isNextThreeQuotes(
line: String,
index: Int,
quote: Char
): Boolean = line[index] == quote && line[index + 1] == quote && line[index + 2] == quote

private fun isInMultilineString(): Boolean = isInMultilineBasic || isInMultilineLiteral

/**
* @return true if string is a last line of multiline value declaration
*/
private fun isEndOfMultilineValue(multilineType: MultilineType): Boolean {
if (multilineType == MultilineType.NOT_A_MULTILINE) {
throw ParseException("Internal parse exception", startLineNo)
}
isNested ?: run {
isNested = hasTwoConsecutiveSymbolsIgnoreWhitespaces(getLine(), multilineType.openSymbols[0])
}

return if (isNested == true) {
lines.joinToString("")
.trim()
.endsWith(multilineType.closingSymbols + multilineType.closingSymbols)
} else {
lines.last()
.trim()
.endsWith(multilineType.closingSymbols)
}
}

private fun hasTwoConsecutiveSymbolsIgnoreWhitespaces(value: String, searchSymbol: Char): Boolean? {
val firstIndex = value.indexOf(searchSymbol)
if (firstIndex == -1) {
return false
}

val nextIndex = value.indexOf(searchSymbol, firstIndex + 1)

if (nextIndex != -1) {
val between = value.substring(firstIndex + 1, nextIndex)
return between.all { it.isWhitespace() }
}

val isRestHasOnlyWhitespaces = !value.substring(firstIndex + 1).any { !it.isWhitespace() }
return if (isRestHasOnlyWhitespaces) {
null
} else {
false
}
}

companion object {
private val stringTypes = listOf(MultilineType.BASIC_STRING, MultilineType.LITERAL_STRING)

/**
* Important! We treat a multi-line that is declared in one line ("""abc""") as a regular not multiline string
*
* @param line
* @param config
* @return MultilineType
*/
fun getMultilineType(line: String, config: TomlInputConfig): MultilineType {
val line = line.takeBeforeComment(config.allowEscapedQuotesInLiteralStrings)
val firstEqualsSign = line.indexOfFirst { it == '=' }
if (firstEqualsSign == -1) {
return MultilineType.NOT_A_MULTILINE
}
val value = line.substring(firstEqualsSign + 1).trim()

if (value.startsWith(MultilineType.ARRAY.openSymbols) &&
!value.endsWith(MultilineType.ARRAY.closingSymbols)
) {
return MultilineType.ARRAY
}

// If we have more than 1 combination of (""") - it means that
// multi-line is declared in one line, and we can handle it as not a multi-line
if (value.startsWith(MultilineType.BASIC_STRING.openSymbols) && value.getCountOfOccurrencesOfSubstring(MultilineType.BASIC_STRING.openSymbols) == 1
) {
return MultilineType.BASIC_STRING
}
if (value.startsWith(MultilineType.LITERAL_STRING.openSymbols) &&
value.getCountOfOccurrencesOfSubstring(MultilineType.LITERAL_STRING.openSymbols) == 1
) {
return MultilineType.LITERAL_STRING
}

return MultilineType.NOT_A_MULTILINE
}
}
}
Loading

0 comments on commit 84a732b

Please sign in to comment.