-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ASan][libc++] Turn on ASan annotations for short strings #79536
Merged
AdvenamTacet
merged 2 commits into
llvm:main
from
trail-of-forks:short-string-annotations-v3
May 7, 2024
+471
−41
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
182 changes: 182 additions & 0 deletions
182
libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
// REQUIRES: asan | ||
// UNSUPPORTED: c++03 | ||
|
||
#include <cassert> | ||
#include <string> | ||
#include <array> | ||
#include <deque> | ||
#include "test_macros.h" | ||
#include "asan_testing.h" | ||
#include "min_allocator.h" | ||
|
||
// This tests exists to check if strings work well with deque, as those | ||
// may be partialy annotated, we cannot simply call | ||
// is_double_ended_contiguous_container_asan_correct, as it assumes that | ||
// object memory inside is not annotated, so we check everything in a more careful way. | ||
|
||
template <typename D> | ||
void verify_inside(D const& d) { | ||
for (size_t i = 0; i < d.size(); ++i) { | ||
assert(is_string_asan_correct(d[i])); | ||
} | ||
} | ||
|
||
template <typename S, size_t N> | ||
S get_s(char c) { | ||
S s; | ||
for (size_t i = 0; i < N; ++i) | ||
s.push_back(c); | ||
|
||
return s; | ||
} | ||
|
||
template <class C, class S> | ||
void test_string() { | ||
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16; | ||
|
||
{ | ||
C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N); | ||
verify_inside(d1a); | ||
verify_inside(d1b); | ||
verify_inside(d1c); | ||
verify_inside(d1d); | ||
} | ||
{ | ||
C d2; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
d2.push_back(get_s<S, 1>(i % 10 + 'a')); | ||
verify_inside(d2); | ||
d2.push_back(get_s<S, 22>(i % 10 + 'b')); | ||
verify_inside(d2); | ||
|
||
d2.pop_front(); | ||
verify_inside(d2); | ||
} | ||
} | ||
{ | ||
C d3; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
d3.push_front(get_s<S, 1>(i % 10 + 'a')); | ||
verify_inside(d3); | ||
d3.push_front(get_s<S, 28>(i % 10 + 'b')); | ||
verify_inside(d3); | ||
|
||
d3.pop_back(); | ||
verify_inside(d3); | ||
} | ||
} | ||
{ | ||
C d4; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
// When there is no SSO, all elements inside should not be poisoned, | ||
// so we can verify deque poisoning. | ||
d4.push_front(get_s<S, 33>(i % 10 + 'a')); | ||
verify_inside(d4); | ||
assert(is_double_ended_contiguous_container_asan_correct(d4)); | ||
d4.push_back(get_s<S, 28>(i % 10 + 'b')); | ||
verify_inside(d4); | ||
assert(is_double_ended_contiguous_container_asan_correct(d4)); | ||
} | ||
} | ||
{ | ||
C d5; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
// In d4 we never had poisoned memory inside deque. | ||
// Here we start with SSO, so part of the inside of the container, | ||
// will be poisoned. | ||
d5.push_front(S()); | ||
verify_inside(d5); | ||
} | ||
for (size_t i = 0; i < d5.size(); ++i) { | ||
// We change the size to have long string. | ||
// Memory owne by deque should not be poisoned by string. | ||
d5[i].resize(100); | ||
verify_inside(d5); | ||
} | ||
|
||
assert(is_double_ended_contiguous_container_asan_correct(d5)); | ||
|
||
d5.erase(d5.begin() + 2); | ||
verify_inside(d5); | ||
|
||
d5.erase(d5.end() - 2); | ||
verify_inside(d5); | ||
|
||
assert(is_double_ended_contiguous_container_asan_correct(d5)); | ||
} | ||
{ | ||
C d6a; | ||
assert(is_double_ended_contiguous_container_asan_correct(d6a)); | ||
|
||
C d6b(N + 2, get_s<S, 100>('a')); | ||
d6b.push_front(get_s<S, 101>('b')); | ||
while (!d6b.empty()) { | ||
d6b.pop_back(); | ||
assert(is_double_ended_contiguous_container_asan_correct(d6b)); | ||
} | ||
|
||
C d6c(N + 2, get_s<S, 102>('c')); | ||
while (!d6c.empty()) { | ||
d6c.pop_back(); | ||
assert(is_double_ended_contiguous_container_asan_correct(d6c)); | ||
} | ||
} | ||
{ | ||
C d7(9 * N + 2); | ||
|
||
d7.insert(d7.begin() + 1, S()); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.end() - 3, S()); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a')); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.end() - 2 * N, get_s<S, 1>('b')); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c')); | ||
verify_inside(d7); | ||
|
||
// It may not be short for big element types, but it will be checked correctly: | ||
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d')); | ||
verify_inside(d7); | ||
|
||
d7.erase(d7.begin() + 2); | ||
verify_inside(d7); | ||
|
||
d7.erase(d7.end() - 2); | ||
verify_inside(d7); | ||
} | ||
} | ||
|
||
template <class S> | ||
void test_container() { | ||
test_string<std::deque<S, std::allocator<S>>, S>(); | ||
test_string<std::deque<S, min_allocator<S>>, S>(); | ||
test_string<std::deque<S, safe_allocator<S>>, S>(); | ||
} | ||
|
||
int main(int, char**) { | ||
// Those tests support only types based on std::basic_string. | ||
test_container<std::string>(); | ||
test_container<std::wstring>(); | ||
#if TEST_STD_VER >= 11 | ||
test_container<std::u16string>(); | ||
test_container<std::u32string>(); | ||
#endif | ||
#if TEST_STD_VER >= 20 | ||
test_container<std::u8string>(); | ||
#endif | ||
|
||
return 0; | ||
} |
56 changes: 56 additions & 0 deletions
56
libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
// REQUIRES: asan | ||
// UNSUPPORTED: c++03 | ||
|
||
// <string> | ||
|
||
// Basic test if ASan annotations work for short strings. | ||
|
||
#include <string> | ||
#include <cassert> | ||
#include <cstdlib> | ||
|
||
#include "asan_testing.h" | ||
#include "min_allocator.h" | ||
#include "test_iterators.h" | ||
#include "test_macros.h" | ||
|
||
extern "C" void __sanitizer_set_death_callback(void (*callback)(void)); | ||
|
||
void do_exit() { exit(0); } | ||
|
||
int main(int, char**) { | ||
{ | ||
typedef cpp17_input_iterator<char*> MyInputIter; | ||
// Should not trigger ASan. | ||
std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v; | ||
char i[] = {'a', 'b', 'c', 'd'}; | ||
|
||
v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4)); | ||
assert(v[0] == 'a'); | ||
assert(is_string_asan_correct(v)); | ||
} | ||
|
||
__sanitizer_set_death_callback(do_exit); | ||
{ | ||
using T = char; | ||
using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>; | ||
const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'}; | ||
C c(std::begin(t), std::end(t)); | ||
assert(is_string_asan_correct(c)); | ||
assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) != | ||
0); | ||
volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away. | ||
assert(false); // if we got here, ASAN didn't trigger | ||
((void)foo); | ||
} | ||
|
||
return 0; | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO ASan shouldn't affect traits of the type. This change definitely needs more discussion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe there needs to be some ASan handling when relocating objects? We're destroying objects after all, which should probably poison memory.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm happy to incrementally improve string annotations.
Without change of
__trivially_relocatable
and no other changes ASan error happens exactly here:llvm-project/libcxx/include/__memory/uninitialized_algorithms.h
Lines 644 to 646 in 11a6799
__builtin_memcpy
accesses poisoned memory, when objects memory is poisoned.We can unpoison memory there for every type. Then problematic code area would be:
Disadvantage of that over changing the value of
__trivially_relocatable
is the need of unpoisoning memory like that in every function which depends on__libcpp_is_trivially_relocatable
. But it shouldn't be a big problem.What I don't like more is fact that objects after that move won't be poisoned correctly (may result in false negatives).
My idea to fix it is creating a compiler-rt function
__asan_move_annotations
which may be used here instead of__asan_unpoison_memory_region
, making old buffer unpoisoned and the new buffer poisoned in the same way as the old one.If we try to optimize it as much as possible, creating something similar to
__libcpp_is_trivially_relocatable
just for ASan is possible. For example__libcpp_may_memory_be_poisoned
, and calling__asan_unpoison_memory_region/__asan_move_annotations
if and only if that value is true. However,__asan_unpoison_memory_region
should be cheaper than__builtin_memcpy
, so I think we can accept additional cost here as new ASan trait adds a lot of complexity.Poisoning, if happens, happens in the allocator (during deallocation) and we cannot assume anything about it. For example, allocator cleaning memory before freeing it touches the whole memory before annotations are updated. Therefore
__annotate_delete()
in all classes is necessary.But something like
__asan_move_annotations
is possible and I can create it. Then we can go back to one value of__trivially_relocatable
.To put my actions behind my words, I will open a PR with that change Today or Tomorrow.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that change as two or three PRs.
__memcpy_with_asan
.std::basic_string
.Last thing is trivial.
__memcpy_with_asan
I hope to be fairly easy, but I am not sure what place is the best to add it. Fist one I just drafted, but didn't test it yet. I should finish everything today, unless we have a discussion about changing direction of that work. Let me know what you think.