diff --git a/doc/specs/stdlib_strings.md b/doc/specs/stdlib_strings.md index c047653b5..19bbf4a59 100644 --- a/doc/specs/stdlib_strings.md +++ b/doc/specs/stdlib_strings.md @@ -221,7 +221,7 @@ and remains active until `last` index is crossed. #### Syntax -`string = [[stdlib_strings(module):slice(interface)]] (string, first, last, stride)` +`string = [[stdlib_strings(module):slice(interface)]] (string [, first, last, stride])` #### Status @@ -233,13 +233,13 @@ Pure function. #### Argument -- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]] +- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. This argument is intent(in). -- `first`: integer +- `first`: integer. This argument is intent(in) and optional. -- `last`: integer +- `last`: integer. This argument is intent(in) and optional. -- `stride`: integer +- `stride`: integer. This argument is intent(in) and optional. #### Result value @@ -270,3 +270,61 @@ program demo_slice end program demo_slice ``` + + + +### `find` + +#### Description + +Returns the starting index of the `occurrence`th occurrence of the substring `pattern` +in the input string `string`. +Default value of `occurrence` is set to `1`. +If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring as two different occurrences. +If `occurrence`th occurrence is not found, function returns `0`. + + +#### Syntax + +`string = [[stdlib_strings(module):find(interface)]] (string, pattern [, occurrence, consider_overlapping])` + +#### Status + +Experimental + +#### Class + +Elemental function + +#### Argument + +- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `pattern`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `occurrence`: integer. + This argument is intent(in) and optional. +- `consider_overlapping`: logical. + This argument is intent(in) and optional. + +#### Result value + +The result is a scalar of integer type or integer array of rank equal to the highest rank among all dummy arguments. + +#### Example + +```fortran +program demo_find + use stdlib_string_type, only: string_type, assignment(=) + use stdlib_strings, only : find + implicit none + string_type :: string + + string = "needle in the character-stack" + + print *, find(string, "needle") ! 1 + print *, find(string, ["a", "c"], [3, 2]) ! [27, 20] + print *, find("qwqwqwq", "qwq", 3, [.false., .true.]) ! [0, 5] + +end program demo_find +``` diff --git a/src/Makefile.manual b/src/Makefile.manual index 28d00a6cc..06a99a472 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -126,7 +126,8 @@ stdlib_stats_distribution_PRNG.o: \ stdlib_kinds.o \ stdlib_error.o stdlib_string_type.o: stdlib_ascii.o \ - stdlib_kinds.o + stdlib_kinds.o stdlib_strings.o: stdlib_ascii.o \ - stdlib_string_type.o + stdlib_string_type.o \ + stdlib_optval.o stdlib_math.o: stdlib_kinds.o diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 158b06588..89d5ba020 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -4,14 +4,15 @@ !> !> The specification of this module is available [here](../page/specs/stdlib_strings.html). module stdlib_strings - use stdlib_ascii, only : whitespace - use stdlib_string_type, only : string_type, char, verify + use stdlib_ascii, only: whitespace + use stdlib_string_type, only: string_type, char, verify + use stdlib_optval, only: optval implicit none private public :: strip, chomp public :: starts_with, ends_with - public :: slice + public :: slice, find !> Remove leading and trailing whitespace characters. @@ -67,6 +68,16 @@ module stdlib_strings module procedure :: slice_char end interface slice + !> Finds the starting index of substring 'pattern' in the input 'string' + !> [Specifications](link to the specs - to be completed) + !> + !> Version: experimental + interface find + module procedure :: find_string_string + module procedure :: find_string_char + module procedure :: find_char_string + module procedure :: find_char_char + end interface find contains @@ -366,5 +377,127 @@ pure function slice_char(string, first, last, stride) result(sliced_string) end do end function slice_char + !> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern' + !> in input 'string' + !> Returns an integer + elemental function find_string_string(string, pattern, occurrence, consider_overlapping) result(res) + type(string_type), intent(in) :: string + type(string_type), intent(in) :: pattern + integer, intent(in), optional :: occurrence + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = find(char(string), char(pattern), occurrence, consider_overlapping) + + end function find_string_string + + !> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern' + !> in input 'string' + !> Returns an integer + elemental function find_string_char(string, pattern, occurrence, consider_overlapping) result(res) + type(string_type), intent(in) :: string + character(len=*), intent(in) :: pattern + integer, intent(in), optional :: occurrence + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = find(char(string), pattern, occurrence, consider_overlapping) + + end function find_string_char + + !> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern' + !> in input 'string' + !> Returns an integer + elemental function find_char_string(string, pattern, occurrence, consider_overlapping) result(res) + character(len=*), intent(in) :: string + type(string_type), intent(in) :: pattern + integer, intent(in), optional :: occurrence + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = find(string, char(pattern), occurrence, consider_overlapping) + + end function find_char_string + + !> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern' + !> in input 'string' + !> Returns an integer + elemental function find_char_char(string, pattern, occurrence, consider_overlapping) result(res) + character(len=*), intent(in) :: string + character(len=*), intent(in) :: pattern + integer, intent(in), optional :: occurrence + logical, intent(in), optional :: consider_overlapping + integer :: lps_array(len(pattern)) + integer :: res, s_i, p_i, length_string, length_pattern, occurrence_ + logical :: consider_overlapping_ + + consider_overlapping_ = optval(consider_overlapping, .true.) + occurrence_ = optval(occurrence, 1) + res = 0 + length_string = len(string) + length_pattern = len(pattern) + + if (length_pattern > 0 .and. length_pattern <= length_string & + & .and. occurrence_ > 0) then + lps_array = compute_lps(pattern) + + s_i = 1 + p_i = 1 + do while(s_i <= length_string) + if (string(s_i:s_i) == pattern(p_i:p_i)) then + if (p_i == length_pattern) then + occurrence_ = occurrence_ - 1 + if (occurrence_ == 0) then + res = s_i - length_pattern + 1 + exit + else if (consider_overlapping_) then + p_i = lps_array(p_i) + else + p_i = 0 + end if + end if + s_i = s_i + 1 + p_i = p_i + 1 + else if (p_i > 1) then + p_i = lps_array(p_i - 1) + 1 + else + s_i = s_i + 1 + end if + end do + end if + + end function find_char_char + + !> Computes longest prefix suffix for each index of the input 'string' + !> + !> Returns an array of integers + pure function compute_lps(string) result(lps_array) + character(len=*), intent(in) :: string + integer :: lps_array(len(string)) + integer :: i, j, length_string + + length_string = len(string) + + if (length_string > 0) then + lps_array(1) = 0 + + i = 2 + j = 1 + do while (i <= length_string) + if (string(j:j) == string(i:i)) then + lps_array(i) = j + i = i + 1 + j = j + 1 + else if (j > 1) then + j = lps_array(j - 1) + 1 + else + lps_array(i) = 0 + i = i + 1 + end if + end do + end if + + end function compute_lps + end module stdlib_strings diff --git a/src/tests/string/test_string_functions.f90 b/src/tests/string/test_string_functions.f90 index e72b4c162..194c9f1bb 100644 --- a/src/tests/string/test_string_functions.f90 +++ b/src/tests/string/test_string_functions.f90 @@ -4,7 +4,7 @@ module test_string_functions use stdlib_error, only : check use stdlib_string_type, only : string_type, assignment(=), operator(==), & to_lower, to_upper, to_title, to_sentence, reverse - use stdlib_strings, only: slice + use stdlib_strings, only: slice, find use stdlib_optval, only: optval use stdlib_ascii, only : to_string implicit none @@ -162,6 +162,38 @@ subroutine test_slice_string end subroutine test_slice_string + subroutine test_find + type(string_type) :: test_string_1, test_string_2, test_pattern_1, test_pattern_2 + test_string_1 = "qwqwqwqwqwqwqw" + test_string_2 = "abccbabccbabc" + test_pattern_1 = "qwq" + test_pattern_2 = "abccbabc" + + call check(all(find([test_string_1, test_string_2], test_pattern_1, 4) == [7, 0]), & + & 'Find: [test_string_1, test_string_2], test_pattern_1, 4') + call check(all(find(test_string_1, [test_pattern_1, test_pattern_2], 3, .false.) == [9, 0]), & + & 'Find: test_string_1, [test_pattern_1, test_pattern_2], 3, .false.') + call check(find(test_string_1, test_pattern_1, 7) == 0, & + & 'Find: test_string_1, test_pattern_1, 7') + call check(all(find([test_string_1, test_string_2, test_string_2], [test_pattern_1, & + & test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]) == [0, 0, 6]), & + & 'Find: [test_string_1, test_string_2, test_string_2], [test_pattern_1, & + & test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]') + call check(find("qwqwqwqwqwqwqw", test_pattern_1) == 1, & + & 'Find: "qwqwqwqwqwqwqw", test_pattern_1') + call check(all(find(test_string_1, ["qwq", "wqw"], 2) == [3, 4]), & + & 'Find: test_string_1, ["qwq", "wqw"], 2') + call check(find("qwqwqwqwqwqwqw", "qwq", 2, .false.) == 5, & + & 'Find: "qwqwqwqwqwqwqw", "qwq", 2, .false.') + call check(find("", "") == 0, & + & 'Find: "", ""') + call check(find("", test_pattern_1) == 0, & + & 'Find: "", test_pattern_1') + call check(find(test_string_1, "") == 0, & + & 'Find: test_string_1, ""') + + end subroutine test_find + subroutine test_slice_gen character(len=*), parameter :: test = & & "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" @@ -300,5 +332,6 @@ program tester call test_reverse_string call test_slice_string call test_slice_gen + call test_find end program tester