diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index b50d1c213..d8c8c347b 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -63,10 +63,10 @@ extension Processor { switch payload.semanticLevel { case .graphemeCluster: return input.index(after: currentPosition) == subjectBounds.upperBound - && input[currentPosition].isNewline + && input[currentPosition].isNewline case .unicodeScalar: return input.unicodeScalars.index(after: currentPosition) == subjectBounds.upperBound - && input.unicodeScalars[currentPosition].isNewline + && input.unicodeScalars[currentPosition].isNewline } case .endOfSubject: return currentPosition == subjectBounds.upperBound @@ -121,6 +121,7 @@ extension Processor { // MARK: Matching `.` extension String { + // TODO: Should the below have a `limitedBy` parameter? func _matchAnyNonNewline( at currentPosition: String.Index, @@ -155,11 +156,11 @@ extension String { return .unknown } switch asciiValue { - case ._lineFeed, ._carriageReturn: - return .definite(nil) - default: - assert(!isCRLF) - return .definite(next) + case (._lineFeed)...(._carriageReturn): + return .definite(nil) + default: + assert(!isCRLF) + return .definite(next) } } @@ -183,6 +184,7 @@ extension String { // MARK: - Built-in character class matching extension String { + // TODO: Should the below have a `limitedBy` parameter? // Mentioned in ProgrammersManual.md, update docs if redesigned func _matchBuiltinCC( diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 81b80b00e..873627567 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -1,31 +1,37 @@ extension Processor { func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { - var next: Input.Index? + // FIXME: is the below updated for scalar semantics? switch payload.type { case .bitset: - next = input.matchBitset( + return input.matchBitset( registers[payload.bitset], at: currentPosition, limitedBy: end) case .asciiChar: - next = input.matchScalar( + return input.matchScalar( UnicodeScalar.init(_value: UInt32(payload.asciiChar)), at: currentPosition, limitedBy: end, boundaryCheck: true) case .builtin: + // FIXME: bounds check? endIndex or end? + // We only emit .quantify if it consumes a single character - next = input._matchBuiltinCC( + return input._matchBuiltinCC( payload.builtin, at: currentPosition, isInverted: payload.builtinIsInverted, isStrictASCII: payload.builtinIsStrict, isScalarSemantics: false) case .any: - // TODO: call out to existing code with quick check - let matched = currentPosition != input.endIndex - && (!input[currentPosition].isNewline || payload.anyMatchesNewline) - next = matched ? input.index(after: currentPosition) : nil + // FIXME: endIndex or end? + guard currentPosition < input.endIndex else { return nil } + + if payload.anyMatchesNewline { + return input.index(after: currentPosition) + } + + return input._matchAnyNonNewline( + at: currentPosition, isScalarSemantics: false) } - return next } /// Generic quantify instruction interpreter diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index e8e41a114..a6c9babbe 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1891,6 +1891,11 @@ extension RegexTests { func testSingleLineMode() { firstMatchTest(#".+"#, input: "a\nb", match: "a") firstMatchTest(#"(?s:.+)"#, input: "a\nb", match: "a\nb") + + // We recognize LF, line tab, FF, and CR as newlines by default + firstMatchTest(#"."#, input: "\u{A}\u{B}\u{C}\u{D}\nb", match: "b") + firstMatchTest(#".+"#, input: "\u{A}\u{B}\u{C}\u{D}\nbb", match: "bb") + } func testMatchNewlines() {