Security vs Efficiency. A case examine of C, C++ and Rust type implementations.
{“payload”:{“allShortcutsEnabled”:false,”fileTree”:{“writeup/sort_safety”:{“gadgets”:[{“name”:”assets”,”path”:”writeup/sort_safety/assets”,”contentType”:”directory”},{“name”:”text.md”,”path”:”writeup/sort_safety/text.md”,”contentType”:”file”}],”totalCount”:2},”writeup”:{“gadgets”:[{“name”:”glidesort_perf_analysis”,”path”:”writeup/glidesort_perf_analysis”,”contentType”:”directory”},{“name”:”intel_avx512″,”path”:”writeup/intel_avx512″,”contentType”:”directory”},{“name”:”sort_safety”,”path”:”writeup/sort_safety”,”contentType”:”directory”}],”totalCount”:3},””:{“gadgets”:[{“name”:”benches”,”path”:”benches”,”contentType”:”directory”},{“name”:”fuzz-afl”,”path”:”fuzz-afl”,”contentType”:”directory”},{“name”:”fuzz”,”path”:”fuzz”,”contentType”:”directory”},{“name”:”ipnsort”,”path”:”ipnsort”,”contentType”:”directory”},{“name”:”results”,”path”:”results”,”contentType”:”directory”},{“name”:”sort_test_tools”,”path”:”sort_test_tools”,”contentType”:”directory”},{“name”:”src”,”path”:”src”,”contentType”:”directory”},{“name”:”tests”,”path”:”tests”,”contentType”:”directory”},{“name”:”util”,”path”:”util”,”contentType”:”directory”},{“name”:”writeup”,”path”:”writeup”,”contentType”:”directory”},{“name”:”.editorconfig”,”path”:”.editorconfig”,”contentType”:”file”},{“name”:”.gitignore”,”path”:”.gitignore”,”contentType”:”file”},{“name”:”CODE_OF_CONDUCT.md”,”path”:”CODE_OF_CONDUCT.md”,”contentType”:”file”},{“name”:”Cargo.lock”,”path”:”Cargo.lock”,”contentType”:”file”},{“name”:”Cargo.toml”,”path”:”Cargo.toml”,”contentType”:”file”},{“name”:”LICENSE”,”path”:”LICENSE”,”contentType”:”file”},{“name”:”README.md”,”path”:”README.md”,”contentType”:”file”},{“name”:”build.rs”,”path”:”build.rs”,”contentType”:”file”},{“name”:”ideas.txt”,”path”:”ideas.txt”,”contentType”:”file”},{“name”:”notex.txt”,”path”:”notex.txt”,”contentType”:”file”}],”totalCount”:20}},”fileTreeProcessingTime”:7.342414000000001,”foldersToFetch”:[],”reducedMotionEnabled”:null,”repo”:{“id”:526908111,”defaultBranch”:”foremost”,”title”:”sort-research-rs”,”ownerLogin”:”Voultapher”,”currentUserCanPush”:false,”isFork”:false,”isEmpty”:false,”createdAt”:”2022-08-20T11:33:31.000Z”,”ownerAvatar”:”https://avatars.githubusercontent.com/u/6864584?v=4″,”public”:true,”personal”:false,”isOrgOwned”:false},”symbolsExpanded”:false,”treeExpanded”:true,”refInfo”:{“title”:”foremost”,”listCacheKey”:”v0:1694870203.0″,”canEdit”:false,”refType”:”department”,”currentOid”:”bf90e294bd84c7cfe746f1bd718ff133bcb89389″},”path”:”writeup/sort_safety/textual content.md”,”currentUser”:null,”blob”:{“rawLines”:null,”stylingDirectives”:null,”csv”:null,”csvError”:null,”dependabotInfo”:{“showConfigurationBanner”:false,”configFilePath”:null,”networkDependabotPath”:”/Voultapher/sort-research-rs/community/updates”,”dismissConfigurationNoticePath”:”/settings/dismiss-notice/dependabot_configuration_notice”,”configurationNoticeDismissed”:null,”repoAlertsPath”:”/Voultapher/sort-research-rs/safety/dependabot”,”repoSecurityAndAnalysisPath”:”/Voultapher/sort-research-rs/settings/security_analysis”,”repoOwnerIsOrg”:false,”currentUserCanAdminRepo”:false},”displayName”:”textual content.md”,”displayUrl”:”https://github.com/Voultapher/sort-research-rs/blob/foremost/writeup/sort_safety/textual content.md?uncooked=true”,”headerInfo”:{“blobSize”:”28.1 KB”,”deleteInfo”:{“deleteTooltip”:”You should be signed in to make or suggest adjustments”},”editInfo”:{“editTooltip”:”You should be signed in to make or suggest adjustments”},”ghDesktopPath”:”https://desktop.github.com”,”gitLfsPath”:null,”onBranch”:true,”shortPath”:”e409677″,”siteNavLoginPath”:”/login?return_to=httpspercent3Apercent2Fpercent2Fgithub.compercent2FVoultapherpercent2Fsort-research-rspercent2Fblobpercent2Fmainpercent2Fwriteuppercent2Fsort_safetypercent2Ftext.md”,”isCSV”:false,”isRichtext”:true,”toc”:[{“level”:1,”text”:”Safety vs Performance. A case study of C, C++ and Rust sort implementations.”,”anchor”:”safety-vs-performance-a-case-study-of-c-c-and-rust-sort-implementations”,”htmlText”:”Safety vs Performance. A case study of C, C++ and Rust sort implementations.”},{“level”:2,”text”:”Introduction”,”anchor”:”introduction”,”htmlText”:”Introduction”},{“level”:2,”text”:”Safe-to-use abstractions”,”anchor”:”safe-to-use-abstractions”,”htmlText”:”Safe-to-use abstractions”},{“level”:3,”text”:”Ord safety”,”anchor”:”ord-safety”,”htmlText”:”Ord safety”},{“level”:3,”text”:”Exception safety”,”anchor”:”exception-safety”,”htmlText”:”Exception safety”},{“level”:3,”text”:”Observation safety”,”anchor”:”observation-safety”,”htmlText”:”Observation safety”},{“level”:2,”text”:”Results”,”anchor”:”results”,”htmlText”:”Results”},{“level”:3,”text”:”Tested sort implementations”,”anchor”:”tested-sort-implementations”,”htmlText”:”Tested sort implementations”},{“level”:4,”text”:”Stable”,”anchor”:”stable”,”htmlText”:”Stable”},{“level”:4,”text”:”Unstable”,”anchor”:”unstable”,”htmlText”:”Unstable”},{“level”:3,”text”:”Property analysis”,”anchor”:”property-analysis”,”htmlText”:”Property analysis”},{“level”:3,”text”:”Observations”,”anchor”:”observations”,”htmlText”:”Observations”},{“level”:2,”text”:”Performance”,”anchor”:”performance”,”htmlText”:”Performance”},{“level”:3,”text”:”Benchmark setup”,”anchor”:”benchmark-setup”,”htmlText”:”Benchmark setup”},{“level”:3,”text”:”Results”,”anchor”:”results-1″,”htmlText”:”Results”},{“level”:4,”text”:”Stable”,”anchor”:”stable-1″,”htmlText”:”Stable”},{“level”:4,”text”:”Unstable”,”anchor”:”unstable-1″,”htmlText”:”Unstable”},{“level”:2,”text”:”libc++’s (libcxx) new std::sort”,”anchor”:”libcs-libcxx-new-stdsort”,”htmlText”:”libc++’s (libcxx) new std::sort”},{“level”:2,”text”:”Author’s conclusion and opinion”,”anchor”:”authors-conclusion-and-opinion”,”htmlText”:”Author’s conclusion and opinion”},{“level”:2,”text”:”Thanks”,”anchor”:”thanks”,”htmlText”:”Thanks”}],”lineInfo”:{“truncatedLoc”:”307″,”truncatedSloc”:”222″},”mode”:”file”},”picture”:false,”isCodeownersFile”:null,”isPlain”:false,”isValidLegacyIssueTemplate”:false,”issueTemplateHelpUrl”:”https://docs.github.com/articles/about-issue-and-pull-request-templates”,”issueTemplate”:null,”discussionTemplate”:null,”language”:”Markdown”,”languageID”:222,”massive”:false,”loggedIn”:false,”newDiscussionPath”:”/Voultapher/sort-research-rs/discussions/new”,”newIssuePath”:”/Voultapher/sort-research-rs/points/new”,”planSupportInfo”:{“repoIsFork”:null,”repoOwnedByCurrentUser”:null,”requestFullPath”:”/Voultapher/sort-research-rs/blob/foremost/writeup/sort_safety/textual content.md”,”showFreeOrgGatedFeatureMessage”:null,”showPlanSupportBanner”:null,”upgradeDataAttributes”:null,”upgradePath”:null},”publishBannersInfo”:{“dismissActionNoticePath”:”/settings/dismiss-notice/publish_action_from_dockerfile”,”dismissStackNoticePath”:”/settings/dismiss-notice/publish_stack_from_file”,”releasePath”:”/Voultapher/sort-research-rs/releases/new?market=true”,”showPublishActionBanner”:false,”showPublishStackBanner”:false},”rawBlobUrl”:”https://github.com/Voultapher/sort-research-rs/uncooked/foremost/writeup/sort_safety/textual content.md”,”renderImageOrRaw”:false,”richText”:”
Writer: Lukas Bergdoll @Voultapher
nDate: 05-10-2023 (DD-MM-YYYY)
n
That is an evaluation of type implementations and their capability, or lack thereof, to keep away from undefined conduct (UB) below numerous utilization situations, and whether or not this impacts their efficiency.
n
TL;DR: The mix of complicated generic implementations striving for efficiency with arbitrary logic in user-defined comparability features, makes generic high-performance type implementations significantly troublesome to implement in a manner that avoids UB below each utilization state of affairs. Even a form implementation utilizing solely memory-safe abstractions is not any assure of UB free adjoining logic. General no correlation between efficiency and security may very well be discovered, nor whether or not secure or unsafe inner abstractions are used. Nevertheless a powerful correlation between being carried out for C or C++ customers and an absence of security presents itself.
n
n
Bias disclaimer. The writer of this evaluation is the writer of ipnsort.
nn
Implementing a form operation with the assistance of computer systems goes again to the early Fifties. The issue assertion is deceptively easy. Take a listing of parts and use a comparability perform that implements a strict weak ordering to swap parts till it is sorted. Now, 70 years later new and extra resource-efficient methods to implement this operation are nonetheless being found. It is an energetic subject of examine in science, BlockQuicksort 2016, ips4o 2017, pdqsort 2021, Multiway Powersort 2022, and lots of extra. There are numerous instructions science is exploring. Environment friendly type implementations operating single threaded on fashionable superscalar, out-of-order and speculative CPUs. Environment friendly implementations operating on a number of such threads. Implementations operating on massively parallel in-order GPUs. Exploration of higher best-case, average-case and worst-case runtime. Exploiting present patterns within the enter knowledge. Exploration of various traits, secure/unstable in-place/allocating and extra. This evaluation focuses on a lesser recognized and talked about facet. How do implementations cope with a user-defined comparability perform that implements arbitrary logic, could not implement a strict weak ordering, could go away the perform with out returning and might modify values as they’re being in contrast.
n
The phrases type implementation and kind algorithm, are explicitly not used interchangeably. Virtually all fashionable implementations are hybrids, utilizing a number of type algorithms.
nn
The Rust language has an idea of secure and unsafe interfaces. A secure interface is sound if the implementation avoids UB irrespective of how it’s used, a property that composes. An implementation composed completely of the utilization of different secure interfaces is taken into account secure. Nevertheless, within the presence of different adjoining unsafe code, additional issues come up. This places the burden on the interface designers and implementers, as a substitute of the customers. The C++ commonplace has an analogous idea, referred to as large and slender contracts, and whereas conceptually it is potential to design interfaces in C++ whose utilization can not result in UB, widespread abstraction together with commonplace library varieties like std::vector
or algorithms like std::type
don’t make such guarantees.
n
Except for rust_crumsort_rs_unstable, all Rust type implementations thought of on this evaluation use unsafe abstractions of their implementations.
n
Ord safety
n
What occurs if the user-defined kind or comparability perform doesn’t implement a strict weak ordering? The C++ commonplace library type interface makes it trivial to set off this case:
n
C++:
n
type(knowledge.start(), knowledge.finish(), [](const auto& a, const auto& b) {n return a <= b; // appropriate can be a < b.n});
n
The Rust commonplace library type interface avoids this drawback in lots of instances, by requiring the user-defined comparability perform to return the Ordering
kind as a substitute of a bool, however it’s nonetheless potential:
n
Rust:
n
knowledge.sort_by(|a, b| {n if a == b {n return Ordering::Much less;n }nn a.cmp(b)n});
n
The query what occurs if the comparability perform doesn’t implement a strict weak ordering may be answered by establishing experiments and measuring the outcomes for numerous implementations. The query what ought to occur is trickier to reply. Adjoining is the query what’s and is not allowed to occur with a view to keep away from UB in each state of affairs.
n
Say the person desires to type this enter of integers:
nn
By mistake a comparability perform is supplied which doesn’t implement the required strict weak ordering. What are potential outcomes?
n
A: [2, 3, 9, 3, 1, 6]nB: [3, 2, 1, 3, 9, 6] + exception/panicnC: [1, 3, 3, 9, 9, 6]nD: [3, 3, 0, 0, 7, 9]nE: Runs forevernF: UBn
n
By definition the outcome can’t be the enter sorted by the predicate, the idea of “sorted” is nonsense with out strict weak ordering. But not all potential outcomes are equal. Variant A returns after a while to the person and leaves the enter in an unspecified order. The set of parts stays the identical. Variant B is identical, with addition of raisingnan exception in C++ and a panic in Rust informing the person of the logic bug of their program. Variant C additionally returns after a while however duplicated some parts and “looses” some parts. Variant D “invents” new parts that had been by no means discovered within the unique enter. Variant E by no means returns to the person. And Variant F may very well be a variety issues like an out-of-bounds learn that causes a CPU MMU exception, unlawful CPU directions, stack smashing, altering unrelated program state and extra.
n
If the kind operation is known as a sequence of swaps, C, D, E and F can all be fairly stunning. How may they result in UB?
n
- n
- C: The duplication normally occurs on the bit stage, ignoring kind semantics. If the ingredient kind is for instance a
unique_ptr<int32_t>
/Field<i32>
, these varieties assume distinctive possession of an allocation. And their destructors will hand a pointer to the allocator for liberating. A bitwise copy ends in use-after-free UB, probably within the type of a double-free. - D: Similar as Variant C, with the addition of arbitrary UB normally attributable to deciphering uninitialized reminiscence as a legitimate occupancy of a kind.
- E: Possibly UB, LLVM specifies such infinite loops with out side-effect as UB, as does C++.
n
n
n
n
Exception safety
n
C++ and Rust are each languages with scope primarily based destructors (RAII), and stack unwinding. Collectively they show a strong abstraction for handbook reminiscence administration. On the similar time, they’ll make implementing generic code extra complicated. Each single level within the type implementation that calls the user-provided comparability perform, should assume that the decision could return through an exception in C++ or panic in Rust.
n
C++:
n
type(knowledge.start(), knowledge.finish(), [](const auto& a, const auto& b) {n if (some_condition(a, b)) {n throw std::runtime_error{"unwind"};n }nn return a < b;n});
n
Rust:
n
knowledge.sort_by(|a, b| {n if some_condition(a, b) {n panic!("unwind");n }nn a.cmp(b)n});
n
In follow an absence of exception security manifests itself within the variants C and or D described within the part about Ord security.
n
Observation safety
n
Each C++ and Rust supply methods to mutate a price by way of a const/shared reference. In Rust that is referred to as inside mutability. C++ achieves this with the assistance of the mutable
kind specifier, whereas Rust builds safe-to-use abstractions on high of the language builtin UnsafeCell
. As a consequence of this it is potential to watch each name to the user-provided comparability perform as a stack worth modification. As quickly as auxiliary reminiscence, be it stack or heap, is used, unsafe bitwise duplications of the thing are carried out. If such a duplicated ingredient is used as enter to the user-provided comparability perform, it might be modified in a manner that should be noticed when the kind completes, both by returning usually or by elevating an exception/panic. A benign state of affairs with stunning penalties can be counting the comparisons carried out by incrementing a counter held throughout the parts inside every name to the user-provided comparability perform. If the property of assured observable comparability isn’t met, the outcome could also be wildly inaccurate in describing what number of instances the user-provided comparability perform has been referred to as. A extra problematic state of affairs invoking UB can be, a user-defined kind holding a pointer that’s conditionally freed and set to null as a part of the user-provided comparability perform. If this modification isn’t noticed after the kind completes, code that depends on a null-pointer examine to see if it was already freed will run into use-after-free UB.
n
C++:
n
struct ValWithPtr {n int32_t val;n mutable uint8_t* buffer;n size_t buffer_len;nn ~ValWithPtr() {n if (buffer) {n free(buffer);n }n }n};nnstd::type(knowledge, knowledge + len, [&some_condition](const auto& a, const auto& b) {n if (some_condition(a, b)) {n free(a.buffer);n a.buffer = nullptr;n free(b.buffer);n b.buffer = nullptr;n }nn return a.val < b.val; n});
n
Rust:
n
pub struct ValWithPtr {n val: i32,n buffer: Cell<Choice<Field<[u8]>>>,n}nndata.sort_by(|a, b| {n if some_condition(a, b) {n a.buffer.set(None);n b.buffer.set(None);n }nn a.val.cmp(&b.val)n});
n
The C language has no constructs that will enable secure modifications by way of a const/shared pointer, as such the examined C primarily based type implementations understandably fail this property.
nn
Tested sort implementations
n
Stable
n
- rust_std_stable | `slice::type` https://github.com/rust-lang/rust (Vendored mid 2022)n- rust_wpwoodjr_stable | https://github.com/wpwoodjr/rust-merge-sort (Vendored mid 2022)n- rust_glidesort_stable | https://github.com/orlp/glidesort (model 0.1.2)n- cpp_std_gnu_stable | libstdc++ `std::stable_sort` (gcc 13.2.1)n- cpp_std_libcxx_stable | libc++ `std::stable_sort` (clang 16.0.6)n- cpp_std_msvc_stable | MSVC `std::stable_sort` (runtime library model 14.30)n- cpp_powersort_stable | https://github.com/sebawild/powersort (Vendored mid 2022)n- cpp_powersort_4way_stable | https://github.com/sebawild/powersort (Vendored mid 2022)n- c_fluxsort_stable | https://github.com/scandum/fluxsort (Vendored early 2023)n
n
Unstable
n
- rust_std_unstable | `slice::sort_unstable` https://github.com/rust-lang/rust (Vendored mid 2022)n- rust_dmsort_unstable | https://github.com/emilk/drop-merge-sort (model 1.0)n- rust_ipnsort_unstable | https://github.com/Voultapher/sort-research-rs/tree/foremost/ipnsort (757478b)n- rust_crumsort_rs_unstable | https://github.com/google/crumsort-rs (model 0.1)n- cpp_std_gnu_unstable | libstdc++ `std::type` (gcc 13.2.1)n- cpp_std_libcxx_unstable | libc++ `std::type` (clang 16.0.6)n- cpp_std_msvc_unstable | MSVC `std::type` (runtime library model 14.30)n- cpp_pdqsort_unstable | https://github.com/orlp/pdqsort (Vendored mid 2022)n- cpp_ips4o_unstable | https://github.com/ips4o/ips4o (Vendored mid 2022)n- cpp_blockquicksort_unstable | https://github.com/weissan/BlockQuicksort (Vendored mid 2022)n- c_crumsort_unstable | https://github.com/scandum/crumsort (Vendored early 2023)n
n
Property analysis
n
Properties:
n
- n
- Purposeful: Does the implementation efficiently cross the take a look at suite of various enter patterns and supported varieties?
- Generic: Does the implementation help arbitrary user-defined varieties?
- Ord security: What occurs if the user-defined kind or comparability perform doesn’t implement a strict weak ordering. E.g. in C++ your comparability perform does
[](const auto& a, const auto& b) { return a.x <= b.x; }
? O == unspecified order however unique parts, E == exception/panic and unspecified order however unique parts, L == infinite loop, C == crash, e.g. heap-buffer-overflow (UB), D unspecified order with duplicates. Solely O and E are secure. - Exception security: What occurs, if the person supplied comparability perform throws an exception/panic? ✅ means it retains the unique enter set in an unspecified order, ???? means it might have duplicated parts within the enter.
- Observable comp: If the kind has inside mutability, will each modification attributable to calling the user-defined comparability perform with const/shared references be seen within the enter, after the kind perform returns 1: usually 2: panic. If exception security isn’t given, it’s virtually unimaginable to attain 2. right here.
- Miri: Does the test-suite cross if run below Miri? S: utilizing the Stacked Borrows aliasing mannequin. T: utilizing the Tree Borrows aliasing mannequin.
n
n
n
n
n
n
n
Title | Purposeful | Generic | Ord security | Exception security | Commentary security | Miri |
---|---|---|---|---|---|---|
rust_std_stable | ✅ | ✅ | O ✅ | ✅ | 1: ✅ 2: ✅ | S: ✅ T: ✅ |
rust_wpwoodjr_stable | ✅ | ✅ | O ✅ | ✅ | 1: ✅ 2: ✅ | S: ???? T: ✅ |
rust_glidesort_stable | ✅ | ✅ | O ✅ | ✅ | 1: ✅ 2: ✅ | S: ✅ T: ✅ |
cpp_std_gnu_stable | ✅ | ✅ | C ???? | ???? | 1: ✅ 2: ???? | – |
cpp_std_libcxx_stable | ✅ | ✅ | O ✅ | ???? | 1: ✅ 2: ???? | – |
cpp_std_msvc_stable | ✅ | ✅ | C ???? | ???? | 1: ✅ 2: ???? | – |
cpp_powersort_stable | ✅ | O ✅ | ???? | 1: ✅ 2: ???? | – | |
cpp_powersort_4way_stable | ✅ | O ✅ | ???? | 1: ✅ 2: ???? | – | |
c_fluxsort_stable | ✅ | C ???? | ???? (5) | 1: ???? 2: ???? (7) | – | |
rust_std_unstable | ✅ | ✅ | O ✅ | ✅ | 1: ✅ 2: ✅ | S: ✅ T: ✅ |
rust_dmsort_unstable | ✅ | ✅ | O ✅ | ✅ | 1: ✅ 2: ???? | S: ???? T: |
rust_ipnsort_unstable | ✅ | ✅ | O or E ✅ | ✅ | 1: ✅ 2: ✅ | S: ✅ T: ✅ |
rust_crumsort_rs_unstable | ✅ | D ???? | ???? (6) | 1: – 2: – | S: |
|
cpp_std_gnu_unstable | ✅ | ✅ | C ???? | ???? | 1: ✅ 2: ???? | – |
cpp_std_libcxx_unstable | ✅ | ✅ | L or C ???? | ???? | 1: ✅ 2: ???? | – |
cpp_std_msvc_unstable | ✅ | ✅ | C ???? | ???? | 1: ✅ 2: ???? | – |
cpp_pdqsort_unstable | ✅ | ✅ | L or C ???? | ???? | 1: ✅ 2: ???? | – |
cpp_ips4o_unstable | ✅ | ✅ | C ???? | ???? | 1: ???? 2: ???? | – |
cpp_blockquicksort_unstable | ✅ | ✅ | C ???? | ???? | 1: ✅ 2: ???? | – |
c_crumsort_unstable | ✅ | C ???? | ???? (5) | 1: ???? 2: ???? (7) | – |
n
Footnotes:
n
- n
- cpp_powersort_stable makes use of
vector::resize
for it is buffer, requiring thatT
is default constructible. - cpp_powersort_4way_stable makes use of
vector::resize
for it is buffer, requiring thatT
is default constructible. cpp_powersort_4way_stable affords many configuration choices, one among them is a template parameter referred to asmergingMethod
.GENERAL_BY_STAGES
helps all user-defined varieties, however is comparatively gradual.WILLEM_TUNED
is quicker however requires a sentinel worth for instanceu64::MAX
, nonetheless this has the impact that the implementation can now not accurately type slices that include the sentinel, making it unsuitable for a normal objective type. - c_fluxsort_stable and c_crumsort_unstable use auxiliary stack and heap reminiscence which may be under-aligned for varieties with alignment bigger than basic alignment. The kind interface requires both, large performance sacrifices or supply stage modification.
- rust_crumsort_rs_unstable limits itself to varieties that implement the
Copy
Trait, this consists of varieties like integers however excludes every type with user-defined destructors likeString
and presently varieties with inside mutability likeCell<i32>
. - c_fluxsort_stable and c_crumsort_unstable are developed as C primarily based kinds. C has no idea of exceptions, or stack unwinding. So this property is barely related if the code is compiled as C++ code.
- By limiting itself to varieties with trivial destructors rust_crumsort_rs_unstable avoids UB as a direct consequence of a panic throughout a comparability. Nevertheless is breaks the idea that calling
type
will retain the unique set of parts after the decision, which might result in UB in adjoining logic that makes use of unsafe code in any other case thought of sound. Though rust_crumsort_rs_unstable makes use of zero traces of unsafe Rust, failing to uphold an intuitive and presently below documented property of type it might break different unsafe code that depends on this assumption. - c_fluxsort_stable and c_crumsort_unstable are developed as C primarily based kinds. C has no idea of inside mutability.
- Passes all assessments besides people who failed for causes not distinctive to the sort of checks Miri performs.
n
n
n
n
n
n
n
n
n
Observations
n
- n
- Some notionally generic implementations, don’t help each sort of user-defined kind potential of their respective implementation language. This consists of cpp_powersort_stable, cpp_powersort_4way_stable, c_fluxsort_stable, rust_crumsort_rs_unstable, c_crumsort_unstable.
- Except for rust_crumsort_rs_unstable (reported issue), which is a port of the C primarily based c_crumsort_unstable, all Rust primarily based implementations are Ord secure. The one C and C++ implementations that keep away from UB in such situations are, cpp_std_libcxx_stable and the powersort household of implementations. It is unclear if that is by design or accidentally. rust_ipnsort_unstable goes additional when it comes to usability by elevating a recoverable and deterministic panic if an Ord violation is detected, informing the person of a logic bug.
- Except for rust_crumsort_rs_unstable (reported issue), which is a port of the C primarily based c_crumsort_unstable, all Rust primarily based implementations are Exception secure. The truth that not one of the examined C++ implementations are exception secure, could appear to point that such a property is unimaginable in C++. Nevertheless that’s simply refutable by exhibiting that this challenge solely arises with the utilization of auxiliary reminiscence. And the utilization of auxiliary reminiscence may be lowered within the temporal area to the short-term worth used for swapping 2 parts. Exception secure type implementation can exist in C++, even with no runtime overhead for unaffected varieties by utilizing scope guards. The shortage of exception security is indicative of various interface and utilization expectations by the library authors. And whereas it might appear tempting to modify to some quicker implementation for varieties which are
std::is_trivially_destructible
/!core::mem::needs_drop
, doing so would nonetheless threat issues for varieties resembling uncooked pointers, which are trivially destructible. And could also be used to type by indirection. A major quantity of actual world code assumes {that a} type includes altering the order of parts. Considerably much less code handles the fact that extensively used implementations might also change the set of supplied parts, changing some with duplicates of others. Which within the case of pointers shortly results in UB resembling double-free use-after-free. Even particular casing for builtin integer varieties may result in issues, within the presence of a user-defined comparability perform. These integers could also be interpreted as pointers, regardless that such code has pointer-provenance issues, such code is presently a standard prevalence in actual world code. - Except for cpp_ips4o_unstable, all Rust and C++ implementations present the primary sort of remark security. C implementations solely must care about it, if they’re compiled as C++ code. The presence of remark security in all C++ std library implementations signifies, that such utilization is present in actual world code the distributors care about, even when the usual leaves this conduct unspecified.
- rust_dmsort_unstable reveals that even when exception security and the primary sort of remark security are given, having the second sort of remark security isn’t a assure. (reported issue)
- Just like C++, the language guidelines of Rust are outlined when it comes to an summary machine and never one concrete implementation. And UB is a violation of stated guidelines, a violation that’s assumed won’t ever occur. Thus it’s potential to carry out sure optimizations below the assumptions that sure properties which are difficult or unimaginable to show for the compiler are true. For extra details about this, see this talk by Chandler Carruth. Miri is a instrument to run Rust code inside a digital machine that tries to seek out UB in unsafe code, by strictly modeling stated summary machine, checking as many properties as potential. It checks alignment, aliasing and extra. Aliasing particularly continues to be an open matter in Rust. Miri makes use of by default the Stacked Borrows aliasing mannequin for unsafe Rust code. And whereas code could fail below Miri it isn’t at all times clear if that may be a critical concern. This may solely be answered as soon as Rust has settled on an aliasing mannequin. With the introduction of Tree Borrows there are actually two completely different aliasing fashions that can be utilized with Miri. Inadvertently these outcomes validate one of many objectives of Tree Borrows, having an aliasing mannequin that avoids stunning authors of unsafe code. Each rust_wpwoodjr_stable and rust_dmsort_unstable fail the take a look at suite with Stacked Borrows and cross it below the foundations set by Tree Borrows.
n
n
n
n
n
n
nn
Benchmark setup
n
Linux 6.5nrustc 1.75.0-nightly (187b8131d 2023-10-03)nclang model 16.0.6ngcc model 13.2.1nAMD Ryzen 9 5900X 12-Core Processor (Zen 3 micro-architecture)nCPU increase enabled.n
n
Rust code constructed with --release
and lto=skinny
and native code construct settings may be discovered here.
n
Some type implementations are adaptive, they are going to attempt to exploit present patterns within the knowledge to do much less work. A breakdown of the benchmark patterns:
n
- n
ascending
, numbers0..len
descending
, numbers0..len
reversedrandom
, random numbers generated by randStdRng::gen
[2]random_d20
, uniform random numbers within the vary0..=20
random_p5
, 95% 0 and 5% random knowledge, not uniformrandom_s95
, 95% sorted adopted by 5% unsorted, simulates append -> typerandom_z1
, Zipfian distribution with characterizing exponent s == 1.0 [3]
n
n
n
n
n
n
n
n
Results
n
Solely 10k u64
are examined on Zen 3. A extra in depth have a look at efficiency of unstable type implementations here and efficiency of secure type implementations here. Beware updates to a few of the examined implementations and toolchains make the outcomes in a roundabout way comparable with the outcomes under.
n
Stable
n
n
Unstable
n
nn
That is an try at untangling the replace to libc++’s std::type
. A tough timeline to the very best of the writer’s information:
nn
Because it stands the up to date std::type
is on monitor for LLVM 17. So as to add to the confusion, the PR title and weblog submit speak about a partition implementation primarily based on BlockQuickSort, however the merged model makes use of an analogous however notably completely different implementation derived from bitsetsort.
nn
As seen within the benchmarks, the present Rust commonplace library unstable type implementation outperforms the C++ commonplace library counterparts. And that regardless of offering a considerably safer to make use of implementation. glidesort and ipnsort reveal that these properties may be upheld even in state-of-the-art high-performance implementations. The kind implementations within the C++ commonplace libraries are normally fairly outdated, which might clarify their poor efficiency. But even comparatively new C++ implementations resembling ips4o disregard utilization security fully, even regressing Commentary security in comparison with the examined commonplace library implementations. The brand new and to this point untested libc++ implementation reveals consciousness of a few of the analyzed security properties, primarily Ord security, however fails to discover a technique to assure UB free utilization. Solely going so far as performing an elective opt-in out-of-bounds (OOB) examine. Disregarding the problems of duplicate parts and exception security. Which comes as little bit of a shock given the discharge date of 2022, 5 years after the pdqsort-derived unstable type in Rust was merged in 2017. I see no cause why a straight port from Rust to C++ would not have been potential whereas satisfying their necessities. The writer Danila Kutenin even mentions the Rust implementation of their weblog submit, so I assume they understand it. To me the Outcomes throughout all examined implementations is indicative of a pervasive mindset within the C and C++ world, that argues it is the customers accountability to watch out, even when that has been proven impossible at scale. Personally I’ve spent a number of days debugging some code at work that broke in very unusual methods, and was attributable to by accident writing <=
as a substitute of <
in a comparability perform, affecting logic in a totally completely different place. Usually security and efficiency are characterised as a set of zero sum tradeoffs, but typically it is potential to seek out higher tradeoffs who’s holistic properties enhance upon a beforehand seen “both or”. Taking the 1 to N relationship of foundational library authors to library customers into consideration, the affect of safe-to-use abstractions ought to change into obvious.
nn
Thanks Klaus Iglberger for offering detailed suggestions and priceless solutions on bettering the readability of this writeup.
n
“,”renderedFileInfo”:null,”shortPath”:null,”tabSize”:8,”topBannersInfo”:{“overridingGlobalFundingFile”:false,”globalPreferredFundingPath”:null,”repoOwner”:”Voultapher”,”repoName”:”sort-research-rs”,”showInvalidCitationWarning”:false,”citationHelpUrl”:”https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files”,”showDependabotConfigurationBanner”:false,”actionsOnboardingTip”:null},”truncated”:false,”viewable”:true,”workflowRedirectUrl”:null,”symbols”:{“timedOut”:false,”notAnalyzed”:false,”symbols”:[{“name”:” Safety vs Performance. A case study of C, C++ and Rust sort implementations.”,”kind”:”section_1″,”identStart”:1,”identEnd”:78,”extentStart”:0,”extentEnd”:28769,”fullyQualifiedName”:” Safety vs Performance. A case study of C, C++ and Rust sort implementations.”,”identUtf16″:{“start”:{“lineNumber”:0,”utf16Col”:1},”end”:{“lineNumber”:0,”utf16Col”:78}},”extentUtf16″:{“start”:{“lineNumber”:0,”utf16Col”:0},”end”:{“lineNumber”:307,”utf16Col”:0}}},{“name”:” Introduction”,”kind”:”section_2″,”identStart”:1047,”identEnd”:1060,”extentStart”:1045,”extentEnd”:2763,”fullyQualifiedName”:” Introduction”,”identUtf16″:{“start”:{“lineNumber”:14,”utf16Col”:2},”end”:{“lineNumber”:14,”utf16Col”:15}},”extentUtf16″:{“start”:{“lineNumber”:14,”utf16Col”:0},”end”:{“lineNumber”:20,”utf16Col”:0}}},{“name”:” Safe-to-use abstractions”,”kind”:”section_2″,”identStart”:2765,”identEnd”:2790,”extentStart”:2763,”extentEnd”:10081,”fullyQualifiedName”:” Safe-to-use abstractions”,”identUtf16″:{“start”:{“lineNumber”:20,”utf16Col”:2},”end”:{“lineNumber”:20,”utf16Col”:27}},”extentUtf16″:{“start”:{“lineNumber”:20,”utf16Col”:0},”end”:{“lineNumber”:162,”utf16Col”:0}}},{“name”:” Ord safety”,”kind”:”section_3″,”identStart”:3694,”identEnd”:3705,”extentStart”:3691,”extentEnd”:6819,”fullyQualifiedName”:” Ord safety”,”identUtf16″:{“start”:{“lineNumber”:26,”utf16Col”:3},”end”:{“lineNumber”:26,”utf16Col”:14}},”extentUtf16″:{“start”:{“lineNumber”:26,”utf16Col”:0},”end”:{“lineNumber”:80,”utf16Col”:0}}},{“name”:” Exception safety”,”kind”:”section_3″,”identStart”:6822,”identEnd”:6839,”extentStart”:6819,”extentEnd”:7690,”fullyQualifiedName”:” Exception safety”,”identUtf16″:{“start”:{“lineNumber”:80,”utf16Col”:3},”end”:{“lineNumber”:80,”utf16Col”:20}},”extentUtf16″:{“start”:{“lineNumber”:80,”utf16Col”:0},”end”:{“lineNumber”:110,”utf16Col”:0}}},{“name”:” Observation safety”,”kind”:”section_3″,”identStart”:7693,”identEnd”:7712,”extentStart”:7690,”extentEnd”:10081,”fullyQualifiedName”:” Observation safety”,”identUtf16″:{“start”:{“lineNumber”:110,”utf16Col”:3},”end”:{“lineNumber”:110,”utf16Col”:22}},”extentUtf16″:{“start”:{“lineNumber”:110,”utf16Col”:0},”end”:{“lineNumber”:162,”utf16Col”:0}}},{“name”:” Results”,”kind”:”section_2″,”identStart”:10083,”identEnd”:10091,”extentStart”:10081,”extentEnd”:23507,”fullyQualifiedName”:” Results”,”identUtf16″:{“start”:{“lineNumber”:162,”utf16Col”:2},”end”:{“lineNumber”:162,”utf16Col”:10}},”extentUtf16″:{“start”:{“lineNumber”:162,”utf16Col”:0},”end”:{“lineNumber”:250,”utf16Col”:0}}},{“name”:” Tested sort implementations”,”kind”:”section_3″,”identStart”:10096,”identEnd”:10124,”extentStart”:10093,”extentEnd”:11907,”fullyQualifiedName”:” Tested sort implementations”,”identUtf16″:{“start”:{“lineNumber”:164,”utf16Col”:3},”end”:{“lineNumber”:164,”utf16Col”:31}},”extentUtf16″:{“start”:{“lineNumber”:164,”utf16Col”:0},”end”:{“lineNumber”:196,”utf16Col”:0}}},{“name”:” Stable”,”kind”:”section_4″,”identStart”:10130,”identEnd”:10137,”extentStart”:10126,”extentEnd”:10931,”fullyQualifiedName”:” Stable”,”identUtf16″:{“start”:{“lineNumber”:166,”utf16Col”:4},”end”:{“lineNumber”:166,”utf16Col”:11}},”extentUtf16″:{“start”:{“lineNumber”:166,”utf16Col”:0},”end”:{“lineNumber”:180,”utf16Col”:0}}},{“name”:” Unstable”,”kind”:”section_4″,”identStart”:10935,”identEnd”:10944,”extentStart”:10931,”extentEnd”:11907,”fullyQualifiedName”:” Unstable”,”identUtf16″:{“start”:{“lineNumber”:180,”utf16Col”:4},”end”:{“lineNumber”:180,”utf16Col”:13}},”extentUtf16″:{“start”:{“lineNumber”:180,”utf16Col”:0},”end”:{“lineNumber”:196,”utf16Col”:0}}},{“name”:” Property analysis”,”kind”:”section_3″,”identStart”:11910,”identEnd”:11928,”extentStart”:11907,”extentEnd”:18615,”fullyQualifiedName”:” Property analysis”,”identUtf16″:{“start”:{“lineNumber”:196,”utf16Col”:3},”end”:{“lineNumber”:196,”utf16Col”:21}},”extentUtf16″:{“start”:{“lineNumber”:196,”utf16Col”:0},”end”:{“lineNumber”:241,”utf16Col”:0}}},{“name”:” Observations”,”kind”:”section_3″,”identStart”:18618,”identEnd”:18631,”extentStart”:18615,”extentEnd”:23507,”fullyQualifiedName”:” Observations”,”identUtf16″:{“start”:{“lineNumber”:241,”utf16Col”:3},”end”:{“lineNumber”:241,”utf16Col”:16}},”extentUtf16″:{“start”:{“lineNumber”:241,”utf16Col”:0},”end”:{“lineNumber”:250,”utf16Col”:0}}},{“name”:” Performance”,”kind”:”section_2″,”identStart”:23509,”identEnd”:23521,”extentStart”:23507,”extentEnd”:25217,”fullyQualifiedName”:” Performance”,”identUtf16″:{“start”:{“lineNumber”:250,”utf16Col”:2},”end”:{“lineNumber”:250,”utf16Col”:14}},”extentUtf16″:{“start”:{“lineNumber”:250,”utf16Col”:0},”end”:{“lineNumber”:288,”utf16Col”:0}}},{“name”:” Benchmark setup”,”kind”:”section_3″,”identStart”:23526,”identEnd”:23542,”extentStart”:23523,”extentEnd”:24577,”fullyQualifiedName”:” Benchmark setup”,”identUtf16″:{“start”:{“lineNumber”:252,”utf16Col”:3},”end”:{“lineNumber”:252,”utf16Col”:19}},”extentUtf16″:{“start”:{“lineNumber”:252,”utf16Col”:0},”end”:{“lineNumber”:275,”utf16Col”:0}}},{“name”:” Results”,”kind”:”section_3″,”identStart”:24580,”identEnd”:24588,”extentStart”:24577,”extentEnd”:25217,”fullyQualifiedName”:” Results”,”identUtf16″:{“start”:{“lineNumber”:275,”utf16Col”:3},”end”:{“lineNumber”:275,”utf16Col”:11}},”extentUtf16″:{“start”:{“lineNumber”:275,”utf16Col”:0},”end”:{“lineNumber”:288,”utf16Col”:0}}},{“name”:” Stable”,”kind”:”section_4″,”identStart”:25080,”identEnd”:25087,”extentStart”:25076,”extentEnd”:25144,”fullyQualifiedName”:” Stable”,”identUtf16″:{“start”:{“lineNumber”:279,”utf16Col”:4},”end”:{“lineNumber”:279,”utf16Col”:11}},”extentUtf16″:{“start”:{“lineNumber”:279,”utf16Col”:0},”end”:{“lineNumber”:283,”utf16Col”:0}}},{“name”:” Unstable”,”kind”:”section_4″,”identStart”:25148,”identEnd”:25157,”extentStart”:25144,”extentEnd”:25217,”fullyQualifiedName”:” Unstable”,”identUtf16″:{“start”:{“lineNumber”:283,”utf16Col”:4},”end”:{“lineNumber”:283,”utf16Col”:13}},”extentUtf16″:{“start”:{“lineNumber”:283,”utf16Col”:0},”end”:{“lineNumber”:288,”utf16Col”:0}}},{“name”:” libc++’s (libcxx) new `std::sort`”,”kind”:”section_2″,”identStart”:25219,”identEnd”:25253,”extentStart”:25217,”extentEnd”:26301,”fullyQualifiedName”:” libc++’s (libcxx) new `std::sort`”,”identUtf16″:{“start”:{“lineNumber”:288,”utf16Col”:2},”end”:{“lineNumber”:288,”utf16Col”:36}},”extentUtf16″:{“start”:{“lineNumber”:288,”utf16Col”:0},”end”:{“lineNumber”:300,”utf16Col”:0}}},{“name”:” Author’s conclusion and opinion”,”kind”:”section_2″,”identStart”:26303,”identEnd”:26335,”extentStart”:26301,”extentEnd”:28629,”fullyQualifiedName”:” Author’s conclusion and opinion”,”identUtf16″:{“start”:{“lineNumber”:300,”utf16Col”:2},”end”:{“lineNumber”:300,”utf16Col”:34}},”extentUtf16″:{“start”:{“lineNumber”:300,”utf16Col”:0},”end”:{“lineNumber”:304,”utf16Col”:0}}},{“name”:” Thanks”,”kind”:”section_2″,”identStart”:28631,”identEnd”:28638,”extentStart”:28629,”extentEnd”:28769,”fullyQualifiedName”:” Thanks”,”identUtf16″:{“start”:{“lineNumber”:304,”utf16Col”:2},”end”:{“lineNumber”:304,”utf16Col”:9}},”extentUtf16″:{“start”:{“lineNumber”:304,”utf16Col”:0},”end”:{“lineNumber”:307,”utf16Col”:0}}}]}},”copilotInfo”:null,”csrf_tokens”:{“/Voultapher/sort-research-rs/branches”:{“submit”:”ADsfz2w4d9BZmeTITyE0T0pEzJFfPmJmCqi-mF1My7QAgRRacwLURDeRsZuUCXLxOZS6XstDA5QLhsSHeUdAaA”},”/repos/preferences”:{“submit”:”MUEyRoCgekhGbqbdSfAyEdSJWCI0pjvmJDtgo01lT1xhCkCewSIWu4eWbTu0zYp_4AZeevzpir7V1k3aRBMMrQ”}}},”title”:”sort-research-rs/writeup/sort_safety/textual content.md at foremost · Voultapher/sort-research-rs”}