From 5027a3446bbd64ab69a9baa9f6a524f70257b745 Mon Sep 17 00:00:00 2001 From: panicbit Date: Sun, 9 Jul 2017 19:57:17 +0200 Subject: [PATCH 1/9] Update target json --- ctr-std/3ds.json | 16 ++-------------- ctru-rs/3ds.json | 16 ++-------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/ctr-std/3ds.json b/ctr-std/3ds.json index bdd5caa..f0dfaa9 100644 --- a/ctr-std/3ds.json +++ b/ctr-std/3ds.json @@ -1,5 +1,5 @@ { - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", + "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", "llvm-target": "arm-none-eabihf", "linker": "arm-none-eabi-gcc", "ar": "arm-none-eabi-ar", @@ -15,17 +15,5 @@ "executables": true, "exe-suffix": ".elf", "panic-strategy": "abort", - "pre-link-args": [ - "-specs=3dsx.specs", - "-march=armv6k", - "-mtune=mpcore", - "-mfloat-abi=hard", - "-mtp=soft" - ], - "post-link-args": [ - "-lc", - "-lm", - "-lsysbase", - "-lc" - ] + "linker-flavor": "gcc" } diff --git a/ctru-rs/3ds.json b/ctru-rs/3ds.json index bdd5caa..f0dfaa9 100644 --- a/ctru-rs/3ds.json +++ b/ctru-rs/3ds.json @@ -1,5 +1,5 @@ { - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", + "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", "llvm-target": "arm-none-eabihf", "linker": "arm-none-eabi-gcc", "ar": "arm-none-eabi-ar", @@ -15,17 +15,5 @@ "executables": true, "exe-suffix": ".elf", "panic-strategy": "abort", - "pre-link-args": [ - "-specs=3dsx.specs", - "-march=armv6k", - "-mtune=mpcore", - "-mfloat-abi=hard", - "-mtp=soft" - ], - "post-link-args": [ - "-lc", - "-lm", - "-lsysbase", - "-lc" - ] + "linker-flavor": "gcc" } From 72df21a4b352c0288c1999e0d8e8ef7c12c4de02 Mon Sep 17 00:00:00 2001 From: panicbit Date: Sun, 9 Jul 2017 20:03:00 +0200 Subject: [PATCH 2/9] ctr-std: Use liballoc from rust-src --- ctr-std/Cargo.toml | 3 --- ctr-std/Xargo.toml | 2 ++ ctr-std/src/lib.rs | 1 + ctru-rs/Xargo.toml | 2 ++ 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ctr-std/Cargo.toml b/ctr-std/Cargo.toml index b7f6da1..4f09726 100644 --- a/ctr-std/Cargo.toml +++ b/ctr-std/Cargo.toml @@ -11,6 +11,3 @@ path = "../ctr-libc" [dependencies.ctru-sys] path = "../ctru-sys" - -[dependencies.alloc_system] -version = "0.1.1" diff --git a/ctr-std/Xargo.toml b/ctr-std/Xargo.toml index 7d069ba..e1901fa 100644 --- a/ctr-std/Xargo.toml +++ b/ctr-std/Xargo.toml @@ -2,6 +2,8 @@ [dependencies.rand] +[dependencies.alloc_system] + [dependencies.ctr-libc] path = "../ctr-libc" stage = 1 diff --git a/ctr-std/src/lib.rs b/ctr-std/src/lib.rs index 7a520b8..5b8c532 100644 --- a/ctr-std/src/lib.rs +++ b/ctr-std/src/lib.rs @@ -1,4 +1,5 @@ #![feature(alloc)] +#![feature(alloc_system)] #![feature(allow_internal_unstable)] #![feature(box_syntax)] #![feature(cfg_target_has_atomic)] diff --git a/ctru-rs/Xargo.toml b/ctru-rs/Xargo.toml index e912cab..339cf5b 100644 --- a/ctru-rs/Xargo.toml +++ b/ctru-rs/Xargo.toml @@ -2,6 +2,8 @@ [dependencies.rand] +[dependencies.alloc_system] + [dependencies.ctr-libc] path = "../ctr-libc" stage = 1 From 78ca2a83a7d1c576453ea8eceb897f19a2d401ee Mon Sep 17 00:00:00 2001 From: panicbit Date: Sun, 9 Jul 2017 21:32:29 +0200 Subject: [PATCH 3/9] ctr-std: Add heap allocator shim --- ctr-std/src/sys/unix/alloc.rs | 100 ++++++++++++++++++++++++++++++++++ ctr-std/src/sys/unix/mod.rs | 1 + 2 files changed, 101 insertions(+) create mode 100644 ctr-std/src/sys/unix/alloc.rs diff --git a/ctr-std/src/sys/unix/alloc.rs b/ctr-std/src/sys/unix/alloc.rs new file mode 100644 index 0000000..d7eb2f2 --- /dev/null +++ b/ctr-std/src/sys/unix/alloc.rs @@ -0,0 +1,100 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use alloc::allocator::{Alloc,Layout,AllocErr,Excess,CannotReallocInPlace}; +use alloc::heap; + +/// Heap allocator that delegates to the default liballoc heap allocator. +/// Its purpose is to override methods while still using the standard alloc api. +#[derive(Copy, Clone, Default, Debug)] +pub struct Heap; + +unsafe impl Alloc for Heap { + #[inline] + unsafe fn alloc(&mut self, layout: Layout) -> Result<*mut u8, AllocErr> { + heap::Heap.alloc(layout) + } + + // A nicer handler for out-of-memory situations than the default one. This + // one prints a message to stderr before aborting. It is critical that this + // code does not allocate any memory since we are in an OOM situation. Any + // errors are ignored while printing since there's nothing we can do about + // them and we are about to exit anyways. + #[inline] + fn oom(&mut self, err: AllocErr) -> ! { + use intrinsics; + use libc::{self, STDERR_FILENO, c_void}; + + let msg = err.description(); + unsafe { + libc::write(STDERR_FILENO, msg.as_ptr() as *const c_void, msg.len()); + libc::write(STDERR_FILENO, "\n".as_ptr() as *const c_void, 1); + intrinsics::abort(); + } + } + + #[inline] + unsafe fn dealloc(&mut self, ptr: *mut u8, layout: Layout) { + heap::Heap.dealloc(ptr, layout) + } + + #[inline] + fn usable_size(&self, layout: &Layout) -> (usize, usize) { + heap::Heap.usable_size(layout) + } + + #[inline] + unsafe fn realloc(&mut self, + ptr: *mut u8, + layout: Layout, + new_layout: Layout) + -> Result<*mut u8, AllocErr> + { + heap::Heap.realloc(ptr, layout, new_layout) + } + + #[inline] + unsafe fn alloc_zeroed(&mut self, layout: Layout) -> Result<*mut u8, AllocErr> { + heap::Heap.alloc_zeroed(layout) + } + + #[inline] + unsafe fn alloc_excess(&mut self, layout: Layout) -> Result { + heap::Heap.alloc_excess(layout) + } + + #[inline] + unsafe fn realloc_excess(&mut self, + ptr: *mut u8, + layout: Layout, + new_layout: Layout) -> Result + { + heap::Heap.realloc_excess(ptr, layout, new_layout) + } + + #[inline] + unsafe fn grow_in_place(&mut self, + ptr: *mut u8, + layout: Layout, + new_layout: Layout) + -> Result<(), CannotReallocInPlace> + { + heap::Heap.grow_in_place(ptr, layout, new_layout) + } + + #[inline] + unsafe fn shrink_in_place(&mut self, + ptr: *mut u8, + layout: Layout, + new_layout: Layout) -> Result<(), CannotReallocInPlace> + { + heap::Heap.shrink_in_place(ptr, layout, new_layout) + } +} diff --git a/ctr-std/src/sys/unix/mod.rs b/ctr-std/src/sys/unix/mod.rs index 4ac7a22..df2be06 100644 --- a/ctr-std/src/sys/unix/mod.rs +++ b/ctr-std/src/sys/unix/mod.rs @@ -29,6 +29,7 @@ pub mod thread; pub mod rand; pub mod thread_local; pub mod time; +pub mod alloc; #[cfg(not(test))] pub fn init() { From d86fadb6c7c7295ed85c46b060ea77b757755056 Mon Sep 17 00:00:00 2001 From: panicbit Date: Sun, 9 Jul 2017 21:37:35 +0200 Subject: [PATCH 4/9] ctr-std: Migrate to the new liballoc API --- ctr-std/src/collections/hash/table.rs | 14 +++++++------- ctr-std/src/lib.rs | 1 + ctr-std/src/sys/unix/mod.rs | 20 -------------------- 3 files changed, 8 insertions(+), 27 deletions(-) diff --git a/ctr-std/src/collections/hash/table.rs b/ctr-std/src/collections/hash/table.rs index eb467bd..bb0fd3c 100644 --- a/ctr-std/src/collections/hash/table.rs +++ b/ctr-std/src/collections/hash/table.rs @@ -10,7 +10,8 @@ #![allow(deprecated)] -use alloc::heap::{EMPTY, allocate, deallocate}; +use sys::alloc::Heap; +use alloc::allocator::{Alloc,Layout}; use cmp; use hash::{BuildHasher, Hash, Hasher}; @@ -622,7 +623,7 @@ impl RawTable { return RawTable { size: 0, capacity: 0, - hashes: Unique::new(EMPTY as *mut HashUint), + hashes: Unique::empty(), marker: marker::PhantomData, }; } @@ -653,10 +654,8 @@ impl RawTable { .expect("capacity overflow"), "capacity overflow"); - let buffer = allocate(size, alignment); - if buffer.is_null() { - ::alloc::oom() - } + let layout = Layout::from_size_align(size, alignment).expect("invalid alloc layout"); + let buffer = Heap.alloc(layout).unwrap_or_else(|err| Heap.oom(err)); let hashes = buffer.offset(hash_offset as isize) as *mut HashUint; @@ -1063,7 +1062,8 @@ impl Drop for RawTable { debug_assert!(!oflo, "should be impossible"); unsafe { - deallocate(self.hashes.as_ptr() as *mut u8, size, align); + let layout = Layout::from_size_align(size, align).expect("invalid alloc layout"); + Heap.dealloc(self.hashes.as_ptr() as *mut u8, layout); // Remember how everything was allocated out of one buffer // during initialization? We only need one call to free here. } diff --git a/ctr-std/src/lib.rs b/ctr-std/src/lib.rs index 5b8c532..af14e12 100644 --- a/ctr-std/src/lib.rs +++ b/ctr-std/src/lib.rs @@ -1,4 +1,5 @@ #![feature(alloc)] +#![feature(allocator_api)] #![feature(alloc_system)] #![feature(allow_internal_unstable)] #![feature(box_syntax)] diff --git a/ctr-std/src/sys/unix/mod.rs b/ctr-std/src/sys/unix/mod.rs index df2be06..9d57045 100644 --- a/ctr-std/src/sys/unix/mod.rs +++ b/ctr-std/src/sys/unix/mod.rs @@ -33,8 +33,6 @@ pub mod alloc; #[cfg(not(test))] pub fn init() { - use alloc::oom; - // By default, some platforms will send a *signal* when an EPIPE error // would otherwise be delivered. This runtime doesn't install a SIGPIPE // handler, causing it to kill the program, which isn't exactly what we @@ -46,24 +44,6 @@ pub fn init() { reset_sigpipe(); } - oom::set_oom_handler(oom_handler); - - // A nicer handler for out-of-memory situations than the default one. This - // one prints a message to stderr before aborting. It is critical that this - // code does not allocate any memory since we are in an OOM situation. Any - // errors are ignored while printing since there's nothing we can do about - // them and we are about to exit anyways. - fn oom_handler() -> ! { - use intrinsics; - let msg = "fatal runtime error: out of memory\n"; - unsafe { - libc::write(libc::STDERR_FILENO, - msg.as_ptr() as *const libc::c_void, - msg.len()); - intrinsics::abort(); - } - } - // I don't think we have signal handling on the 3DS, so let's leave this // blank for now unsafe fn reset_sigpipe() {} From 2538d230b8aa58c8b86af34cd0af3cde11fe9b1e Mon Sep 17 00:00:00 2001 From: panicbit Date: Sun, 9 Jul 2017 23:54:59 +0200 Subject: [PATCH 5/9] Add default allocator symbols --- ctr-std/src/heap.rs | 165 ++++++++++++++++++++++++++++++++++++++++++++ ctr-std/src/lib.rs | 4 +- 2 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 ctr-std/src/heap.rs diff --git a/ctr-std/src/heap.rs b/ctr-std/src/heap.rs new file mode 100644 index 0000000..83bd3b0 --- /dev/null +++ b/ctr-std/src/heap.rs @@ -0,0 +1,165 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! dox + +#![unstable(issue = "32838", feature = "allocator_api")] + +pub use alloc::heap::{Heap, Alloc, Layout, Excess, CannotReallocInPlace, AllocErr}; +#[cfg(not(stage0))] +pub use alloc_system::System; + +#[cfg(all(not(stage0), not(test)))] +#[doc(hidden)] +pub mod __default_lib_allocator { + use super::{System, Layout, Alloc, AllocErr}; + use ptr; + + // for symbol names src/librustc/middle/allocator.rs + // for signatures src/librustc_allocator/lib.rs + + // linkage directives are provided as part of the current compiler allocator + // ABI + + #[no_mangle] + pub unsafe extern fn __rdl_alloc(size: usize, + align: usize, + err: *mut u8) -> *mut u8 { + let layout = Layout::from_size_align_unchecked(size, align); + match System.alloc(layout) { + Ok(p) => p, + Err(e) => { + ptr::write(err as *mut AllocErr, e); + 0 as *mut u8 + } + } + } + + #[no_mangle] + pub unsafe extern fn __rdl_oom(err: *const u8) -> ! { + System.oom((*(err as *const AllocErr)).clone()) + } + + #[no_mangle] + pub unsafe extern fn __rdl_dealloc(ptr: *mut u8, + size: usize, + align: usize) { + System.dealloc(ptr, Layout::from_size_align_unchecked(size, align)) + } + + #[no_mangle] + pub unsafe extern fn __rdl_usable_size(layout: *const u8, + min: *mut usize, + max: *mut usize) { + let pair = System.usable_size(&*(layout as *const Layout)); + *min = pair.0; + *max = pair.1; + } + + #[no_mangle] + pub unsafe extern fn __rdl_realloc(ptr: *mut u8, + old_size: usize, + old_align: usize, + new_size: usize, + new_align: usize, + err: *mut u8) -> *mut u8 { + let old_layout = Layout::from_size_align_unchecked(old_size, old_align); + let new_layout = Layout::from_size_align_unchecked(new_size, new_align); + match System.realloc(ptr, old_layout, new_layout) { + Ok(p) => p, + Err(e) => { + ptr::write(err as *mut AllocErr, e); + 0 as *mut u8 + } + } + } + + #[no_mangle] + pub unsafe extern fn __rdl_alloc_zeroed(size: usize, + align: usize, + err: *mut u8) -> *mut u8 { + let layout = Layout::from_size_align_unchecked(size, align); + match System.alloc_zeroed(layout) { + Ok(p) => p, + Err(e) => { + ptr::write(err as *mut AllocErr, e); + 0 as *mut u8 + } + } + } + + #[no_mangle] + pub unsafe extern fn __rdl_alloc_excess(size: usize, + align: usize, + excess: *mut usize, + err: *mut u8) -> *mut u8 { + let layout = Layout::from_size_align_unchecked(size, align); + match System.alloc_excess(layout) { + Ok(p) => { + *excess = p.1; + p.0 + } + Err(e) => { + ptr::write(err as *mut AllocErr, e); + 0 as *mut u8 + } + } + } + + #[no_mangle] + pub unsafe extern fn __rdl_realloc_excess(ptr: *mut u8, + old_size: usize, + old_align: usize, + new_size: usize, + new_align: usize, + excess: *mut usize, + err: *mut u8) -> *mut u8 { + let old_layout = Layout::from_size_align_unchecked(old_size, old_align); + let new_layout = Layout::from_size_align_unchecked(new_size, new_align); + match System.realloc_excess(ptr, old_layout, new_layout) { + Ok(p) => { + *excess = p.1; + p.0 + } + Err(e) => { + ptr::write(err as *mut AllocErr, e); + 0 as *mut u8 + } + } + } + + #[no_mangle] + pub unsafe extern fn __rdl_grow_in_place(ptr: *mut u8, + old_size: usize, + old_align: usize, + new_size: usize, + new_align: usize) -> u8 { + let old_layout = Layout::from_size_align_unchecked(old_size, old_align); + let new_layout = Layout::from_size_align_unchecked(new_size, new_align); + match System.grow_in_place(ptr, old_layout, new_layout) { + Ok(()) => 1, + Err(_) => 0, + } + } + + #[no_mangle] + pub unsafe extern fn __rdl_shrink_in_place(ptr: *mut u8, + old_size: usize, + old_align: usize, + new_size: usize, + new_align: usize) -> u8 { + let old_layout = Layout::from_size_align_unchecked(old_size, old_align); + let new_layout = Layout::from_size_align_unchecked(new_size, new_align); + match System.shrink_in_place(ptr, old_layout, new_layout) { + Ok(()) => 1, + Err(_) => 0, + } + } +} diff --git a/ctr-std/src/lib.rs b/ctr-std/src/lib.rs index af14e12..cc4825c 100644 --- a/ctr-std/src/lib.rs +++ b/ctr-std/src/lib.rs @@ -1,5 +1,6 @@ #![feature(alloc)] #![feature(allocator_api)] +#![feature(allocator_internals)] #![feature(alloc_system)] #![feature(allow_internal_unstable)] #![feature(box_syntax)] @@ -46,7 +47,7 @@ #![allow(non_camel_case_types, dead_code, unused_features)] #![no_std] - +#![cfg_attr(not(stage0), default_lib_allocator)] #![stable(feature = "rust1", since = "1.0.0")] #[prelude_import] @@ -169,6 +170,7 @@ pub mod panic; pub mod path; pub mod sync; pub mod time; +pub mod heap; // Platform-abstraction modules #[macro_use] From 03d5b453b9989c4b83b1afdc96969552a39ada07 Mon Sep 17 00:00:00 2001 From: Fenrir Date: Sun, 9 Jul 2017 20:21:38 -0600 Subject: [PATCH 6/9] update collections module also remove deprecated `collections` crate dependency --- ctr-std/src/collections/hash/map.rs | 586 +++++++++++++++++++++----- ctr-std/src/collections/hash/set.rs | 233 ++++++++-- ctr-std/src/collections/hash/table.rs | 535 ++++++++++++++--------- ctr-std/src/collections/mod.rs | 74 ++-- ctr-std/src/lib.rs | 23 +- ctr-std/src/sys/unix/alloc.rs | 100 ----- ctr-std/src/sys/unix/mod.rs | 1 - ctr-std/src/sys_common/mod.rs | 4 - 8 files changed, 1062 insertions(+), 494 deletions(-) delete mode 100644 ctr-std/src/sys/unix/alloc.rs diff --git a/ctr-std/src/collections/hash/map.rs b/ctr-std/src/collections/hash/map.rs index 0b310eb..746e180 100644 --- a/ctr-std/src/collections/hash/map.rs +++ b/ctr-std/src/collections/hash/map.rs @@ -19,8 +19,9 @@ use fmt::{self, Debug}; use hash::{Hash, Hasher, BuildHasher, SipHasher13}; use iter::{FromIterator, FusedIterator}; use mem::{self, replace}; -use ops::{Deref, Index}; +use ops::{Deref, Index, InPlace, Place, Placer}; use rand::{self, Rng}; +use ptr; use super::table::{self, Bucket, EmptyBucket, FullBucket, FullBucketMut, RawTable, SafeHash}; use super::table::BucketState::{Empty, Full}; @@ -177,16 +178,51 @@ impl DefaultResizePolicy { // element. // // FIXME(Gankro, pczarn): review the proof and put it all in a separate README.md +// +// Adaptive early resizing +// ---------------------- +// To protect against degenerate performance scenarios (including DOS attacks), +// the implementation includes an adaptive behavior that can resize the map +// early (before its capacity is exceeded) when suspiciously long probe sequences +// are encountered. +// +// With this algorithm in place it would be possible to turn a CPU attack into +// a memory attack due to the aggressive resizing. To prevent that the +// adaptive behavior only triggers when the map is at least half full. +// This reduces the effectiveness of the algorithm but also makes it completely safe. +// +// The previous safety measure also prevents degenerate interactions with +// really bad quality hash algorithms that can make normal inputs look like a +// DOS attack. +// +const DISPLACEMENT_THRESHOLD: usize = 128; +// +// The threshold of 128 is chosen to minimize the chance of exceeding it. +// In particular, we want that chance to be less than 10^-8 with a load of 90%. +// For displacement, the smallest constant that fits our needs is 90, +// so we round that up to 128. +// +// At a load factor of α, the odds of finding the target bucket after exactly n +// unsuccesful probes[1] are +// +// Pr_α{displacement = n} = +// (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1)) +// +// We use this formula to find the probability of triggering the adaptive behavior +// +// Pr_0.909{displacement > 128} = 1.601 * 10^-11 +// +// 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing +// hashing with buckets. -/// A hash map implementation which uses linear probing with Robin Hood bucket -/// stealing. +/// A hash map implemented with linear probing and Robin Hood bucket stealing. /// /// By default, `HashMap` uses a hashing algorithm selected to provide /// resistance against HashDoS attacks. The algorithm is randomly seeded, and a /// reasonable best-effort is made to generate this seed from a high quality, /// secure source of randomness provided by the host without blocking the -/// program. Because of this, the randomness of the seed is dependant on the -/// quality of the system's random number generator at the time it is created. +/// program. Because of this, the randomness of the seed depends on the output +/// quality of the system's random number generator when the seed is created. /// In particular, seeds generated when the system's entropy pool is abnormally /// low such as during system boot may be of a lower quality. /// @@ -198,9 +234,8 @@ impl DefaultResizePolicy { /// attacks such as HashDoS. /// /// The hashing algorithm can be replaced on a per-`HashMap` basis using the -/// `HashMap::default`, `HashMap::with_hasher`, and -/// `HashMap::with_capacity_and_hasher` methods. Many alternative algorithms -/// are available on crates.io, such as the `fnv` crate. +/// [`default`], [`with_hasher`], and [`with_capacity_and_hasher`] methods. Many +/// alternative algorithms are available on crates.io, such as the [`fnv`] crate. /// /// It is required that the keys implement the [`Eq`] and [`Hash`] traits, although /// this can frequently be achieved by using `#[derive(PartialEq, Eq, Hash)]`. @@ -302,6 +337,10 @@ impl DefaultResizePolicy { /// [`PartialEq`]: ../../std/cmp/trait.PartialEq.html /// [`RefCell`]: ../../std/cell/struct.RefCell.html /// [`Cell`]: ../../std/cell/struct.Cell.html +/// [`default`]: #method.default +/// [`with_hasher`]: #method.with_hasher +/// [`with_capacity_and_hasher`]: #method.with_capacity_and_hasher +/// [`fnv`]: https://crates.io/crates/fnv /// /// ``` /// use std::collections::HashMap; @@ -332,7 +371,7 @@ impl DefaultResizePolicy { /// } /// ``` /// -/// A HashMap with fixed list of elements can be initialized from an array: +/// A `HashMap` with fixed list of elements can be initialized from an array: /// /// ``` /// use std::collections::HashMap; @@ -381,7 +420,7 @@ fn search_hashed(table: M, hash: SafeHash, mut is_match: F) -> Inter // Found a hole! return InternalEntry::Vacant { hash: hash, - elem: NoElem(bucket), + elem: NoElem(bucket, displacement), }; } Full(bucket) => bucket, @@ -412,42 +451,46 @@ fn search_hashed(table: M, hash: SafeHash, mut is_match: F) -> Inter } } -fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { +fn pop_internal(starting_bucket: FullBucketMut) + -> (K, V, &mut RawTable) +{ let (empty, retkey, retval) = starting_bucket.take(); let mut gap = match empty.gap_peek() { - Some(b) => b, - None => return (retkey, retval), + Ok(b) => b, + Err(b) => return (retkey, retval, b.into_table()), }; while gap.full().displacement() != 0 { gap = match gap.shift() { - Some(b) => b, - None => break, + Ok(b) => b, + Err(b) => { + return (retkey, retval, b.into_table()); + }, }; } // Now we've done all our shifting. Return the value we grabbed earlier. - (retkey, retval) + (retkey, retval, gap.into_table()) } /// Perform robin hood bucket stealing at the given `bucket`. You must /// also pass that bucket's displacement so we don't have to recalculate it. /// -/// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. +/// `hash`, `key`, and `val` are the elements to "robin hood" into the hashtable. fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, mut displacement: usize, mut hash: SafeHash, mut key: K, mut val: V) - -> &'a mut V { - let starting_index = bucket.index(); + -> FullBucketMut<'a, K, V> { let size = bucket.table().size(); - // Save the *starting point*. - let mut bucket = bucket.stash(); + let raw_capacity = bucket.table().capacity(); // There can be at most `size - dib` buckets to displace, because // in the worst case, there are `size` elements and we already are // `displacement` buckets away from the initial one. - let idx_end = starting_index + size - bucket.displacement(); + let idx_end = (bucket.index() + size - bucket.displacement()) % raw_capacity; + // Save the *starting point*. + let mut bucket = bucket.stash(); loop { let (old_hash, old_key, old_val) = bucket.replace(hash, key, val); @@ -471,7 +514,7 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, // bucket, which is a FullBucket on top of a // FullBucketMut, into just one FullBucketMut. The "table" // refers to the inner FullBucketMut in this context. - return bucket.into_table().into_mut_refs().1; + return bucket.into_table(); } Full(bucket) => bucket, }; @@ -523,11 +566,8 @@ impl HashMap // The caller should ensure that invariants by Robin Hood Hashing hold // and that there's space in the underlying table. fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { - let raw_cap = self.raw_capacity(); let mut buckets = Bucket::new(&mut self.table, hash); - // note that buckets.index() keeps increasing - // even if the pointer wraps back to the first bucket. - let limit_bucket = buckets.index() + raw_cap; + let start_index = buckets.index(); loop { // We don't need to compare hashes for value swap. @@ -540,7 +580,7 @@ impl HashMap Full(b) => b.into_bucket(), }; buckets.next(); - debug_assert!(buckets.index() < limit_bucket); + debug_assert!(buckets.index() != start_index); } } } @@ -612,12 +652,13 @@ impl HashMap } } - /// Creates an empty `HashMap` with the specified capacity, using `hasher` + /// Creates an empty `HashMap` with the specified capacity, using `hash_builder` /// to hash the keys. /// /// The hash map will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash map will not allocate. - /// Warning: `hasher` is normally randomly generated, and + /// + /// Warning: `hash_builder` is normally randomly generated, and /// is designed to allow HashMaps to be resistant to attacks that /// cause many collisions and very poor performance. Setting it /// manually using this function can expose a DoS attack vector. @@ -644,7 +685,9 @@ impl HashMap } } - /// Returns a reference to the map's hasher. + /// Returns a reference to the map's [`BuildHasher`]. + /// + /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html #[stable(feature = "hashmap_public_hasher", since = "1.9.0")] pub fn hasher(&self) -> &S { &self.hash_builder @@ -680,7 +723,9 @@ impl HashMap /// /// # Panics /// - /// Panics if the new allocation size overflows `usize`. + /// Panics if the new allocation size overflows [`usize`]. + /// + /// [`usize`]: ../../std/primitive.usize.html /// /// # Examples /// @@ -696,6 +741,11 @@ impl HashMap let min_cap = self.len().checked_add(additional).expect("reserve overflow"); let raw_cap = self.resize_policy.raw_capacity(min_cap); self.resize(raw_cap); + } else if self.table.tag() && remaining <= self.len() { + // Probe sequence is too long and table is half full, + // resize early to reduce probing length. + let new_capacity = self.table.capacity() * 2; + self.resize(new_capacity); } } @@ -704,6 +754,8 @@ impl HashMap /// 1) Ensure `new_raw_cap` is enough for all the elements, accounting /// for the load factor. /// 2) Ensure `new_raw_cap` is a power of two or zero. + #[inline(never)] + #[cold] fn resize(&mut self, new_raw_cap: usize) { assert!(self.table.size() <= new_raw_cap); assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0); @@ -711,42 +763,11 @@ impl HashMap let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap)); let old_size = old_table.size(); - if old_table.capacity() == 0 || old_table.size() == 0 { + if old_table.size() == 0 { return; } - // Grow the table. - // Specialization of the other branch. - let mut bucket = Bucket::first(&mut old_table); - - // "So a few of the first shall be last: for many be called, - // but few chosen." - // - // We'll most likely encounter a few buckets at the beginning that - // have their initial buckets near the end of the table. They were - // placed at the beginning as the probe wrapped around the table - // during insertion. We must skip forward to a bucket that won't - // get reinserted too early and won't unfairly steal others spot. - // This eliminates the need for robin hood. - loop { - bucket = match bucket.peek() { - Full(full) => { - if full.displacement() == 0 { - // This bucket occupies its ideal spot. - // It indicates the start of another "cluster". - bucket = full.into_bucket(); - break; - } - // Leaving this bucket in the last cluster for later. - full.into_bucket() - } - Empty(b) => { - // Encountered a hole between clusters. - b.into_bucket() - } - }; - bucket.next(); - } + let mut bucket = Bucket::head_bucket(&mut old_table); // This is how the buckets might be laid out in memory: // ($ marks an initialized bucket) @@ -831,7 +852,7 @@ impl HashMap } /// An iterator visiting all keys in arbitrary order. - /// Iterator element type is `&'a K`. + /// The iterator element type is `&'a K`. /// /// # Examples /// @@ -853,7 +874,7 @@ impl HashMap } /// An iterator visiting all values in arbitrary order. - /// Iterator element type is `&'a V`. + /// The iterator element type is `&'a V`. /// /// # Examples /// @@ -875,7 +896,7 @@ impl HashMap } /// An iterator visiting all values mutably in arbitrary order. - /// Iterator element type is `&'a mut V`. + /// The iterator element type is `&'a mut V`. /// /// # Examples /// @@ -902,7 +923,7 @@ impl HashMap } /// An iterator visiting all key-value pairs in arbitrary order. - /// Iterator element type is `(&'a K, &'a V)`. + /// The iterator element type is `(&'a K, &'a V)`. /// /// # Examples /// @@ -925,7 +946,7 @@ impl HashMap /// An iterator visiting all key-value pairs in arbitrary order, /// with mutable references to the values. - /// Iterator element type is `(&'a K, &'a mut V)`. + /// The iterator element type is `(&'a K, &'a mut V)`. /// /// # Examples /// @@ -974,7 +995,9 @@ impl HashMap pub fn entry(&mut self, key: K) -> Entry { // Gotta resize now. self.reserve(1); - self.search_mut(&key).into_entry(key).expect("unreachable") + let hash = self.make_hash(&key); + search_hashed(&mut self.table, hash, |q| q.eq(&key)) + .into_entry(key).expect("unreachable") } /// Returns the number of elements in the map. @@ -1141,13 +1164,14 @@ impl HashMap /// Inserts a key-value pair into the map. /// - /// If the map did not have this key present, `None` is returned. + /// If the map did not have this key present, [`None`] is returned. /// /// If the map did have this key present, the value is updated, and the old /// value is returned. The key is not updated, though; this matters for /// types that can be `==` without being identical. See the [module-level /// documentation] for more. /// + /// [`None`]: ../../std/option/enum.Option.html#variant.None /// [module-level documentation]: index.html#insert-and-complex-keys /// /// # Examples @@ -1201,6 +1225,55 @@ impl HashMap self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket).1) } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all pairs `(k, v)` such that `f(&k,&mut v)` returns `false`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap = (0..8).map(|x|(x, x*10)).collect(); + /// map.retain(|&k, _| k % 2 == 0); + /// assert_eq!(map.len(), 4); + /// ``` + #[stable(feature = "retain_hash_collection", since = "1.18.0")] + pub fn retain(&mut self, mut f: F) + where F: FnMut(&K, &mut V) -> bool + { + if self.table.size() == 0 { + return; + } + let mut elems_left = self.table.size(); + let mut bucket = Bucket::head_bucket(&mut self.table); + bucket.prev(); + let start_index = bucket.index(); + while elems_left != 0 { + bucket = match bucket.peek() { + Full(mut full) => { + elems_left -= 1; + let should_remove = { + let (k, v) = full.read_mut(); + !f(k, v) + }; + if should_remove { + let prev_raw = full.raw(); + let (_, _, t) = pop_internal(full); + Bucket::new_from(prev_raw, t) + } else { + full.into_bucket() + } + }, + Empty(b) => { + b.into_bucket() + } + }; + bucket.prev(); // reverse iteration + debug_assert!(elems_left == 0 || bucket.index() != start_index); + } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -1262,7 +1335,13 @@ impl<'a, K, Q: ?Sized, V, S> Index<&'a Q> for HashMap } } -/// HashMap iterator. +/// An iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`iter`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`iter`]: struct.HashMap.html#method.iter +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct Iter<'a, K: 'a, V: 'a> { inner: table::Iter<'a, K, V>, @@ -1276,19 +1355,46 @@ impl<'a, K, V> Clone for Iter<'a, K, V> { } } -/// HashMap mutable values iterator. +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K: Debug, V: Debug> fmt::Debug for Iter<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.clone()) + .finish() + } +} + +/// A mutable iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`iter_mut`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`iter_mut`]: struct.HashMap.html#method.iter_mut +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct IterMut<'a, K: 'a, V: 'a> { inner: table::IterMut<'a, K, V>, } -/// HashMap move iterator. +/// An owning iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`into_iter`] method on [`HashMap`][`HashMap`] +/// (provided by the `IntoIterator` trait). See its documentation for more. +/// +/// [`into_iter`]: struct.HashMap.html#method.into_iter +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct IntoIter { - inner: table::IntoIter, + pub(super) inner: table::IntoIter, } -/// HashMap keys iterator. +/// An iterator over the keys of a `HashMap`. +/// +/// This `struct` is created by the [`keys`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`keys`]: struct.HashMap.html#method.keys +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct Keys<'a, K: 'a, V: 'a> { inner: Iter<'a, K, V>, @@ -1302,7 +1408,22 @@ impl<'a, K, V> Clone for Keys<'a, K, V> { } } -/// HashMap values iterator. +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K: Debug, V> fmt::Debug for Keys<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.clone()) + .finish() + } +} + +/// An iterator over the values of a `HashMap`. +/// +/// This `struct` is created by the [`values`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`values`]: struct.HashMap.html#method.values +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct Values<'a, K: 'a, V: 'a> { inner: Iter<'a, K, V>, @@ -1316,13 +1437,34 @@ impl<'a, K, V> Clone for Values<'a, K, V> { } } -/// HashMap drain iterator. +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K, V: Debug> fmt::Debug for Values<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.clone()) + .finish() + } +} + +/// A draining iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`drain`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`drain`]: struct.HashMap.html#method.drain +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "drain", since = "1.6.0")] pub struct Drain<'a, K: 'a, V: 'a> { - inner: table::Drain<'a, K, V>, + pub(super) inner: table::Drain<'a, K, V>, } -/// Mutable HashMap values iterator. +/// A mutable iterator over the values of a `HashMap`. +/// +/// This `struct` is created by the [`values_mut`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`values_mut`]: struct.HashMap.html#method.values_mut +/// [`HashMap`]: struct.HashMap.html #[stable(feature = "map_values_mut", since = "1.10.0")] pub struct ValuesMut<'a, K: 'a, V: 'a> { inner: IterMut<'a, K, V>, @@ -1369,19 +1511,20 @@ impl<'a, K, V> InternalEntry> { } } -/// A view into a single location in a map, which may be vacant or occupied. -/// This enum is constructed from the [`entry`] method on [`HashMap`]. +/// A view into a single entry in a map, which may either be vacant or occupied. +/// +/// This `enum` is constructed from the [`entry`] method on [`HashMap`]. /// /// [`HashMap`]: struct.HashMap.html /// [`entry`]: struct.HashMap.html#method.entry #[stable(feature = "rust1", since = "1.0.0")] pub enum Entry<'a, K: 'a, V: 'a> { - /// An occupied Entry. + /// An occupied entry. #[stable(feature = "rust1", since = "1.0.0")] Occupied(#[stable(feature = "rust1", since = "1.0.0")] OccupiedEntry<'a, K, V>), - /// A vacant Entry. + /// A vacant entry. #[stable(feature = "rust1", since = "1.0.0")] Vacant(#[stable(feature = "rust1", since = "1.0.0")] VacantEntry<'a, K, V>), @@ -1405,7 +1548,7 @@ impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for Entry<'a, K, V> { } } -/// A view into a single occupied location in a HashMap. +/// A view into an occupied entry in a `HashMap`. /// It is part of the [`Entry`] enum. /// /// [`Entry`]: enum.Entry.html @@ -1425,7 +1568,7 @@ impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for OccupiedEntry<'a, K, V> { } } -/// A view into a single empty location in a HashMap. +/// A view into a vacant entry in a `HashMap`. /// It is part of the [`Entry`] enum. /// /// [`Entry`]: enum.Entry.html @@ -1451,7 +1594,7 @@ enum VacantEntryState { /// and will kick the current one out on insertion. NeqElem(FullBucket, usize), /// The index is genuinely vacant. - NoElem(EmptyBucket), + NoElem(EmptyBucket, usize), } #[stable(feature = "rust1", since = "1.0.0")] @@ -1557,6 +1700,18 @@ impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { #[unstable(feature = "fused", issue = "35602")] impl<'a, K, V> FusedIterator for IterMut<'a, K, V> {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K, V> fmt::Debug for IterMut<'a, K, V> + where K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.inner.iter()) + .finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for IntoIter { type Item = (K, V); @@ -1580,6 +1735,15 @@ impl ExactSizeIterator for IntoIter { #[unstable(feature = "fused", issue = "35602")] impl FusedIterator for IntoIter {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.inner.iter()) + .finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Iterator for Keys<'a, K, V> { type Item = &'a K; @@ -1649,6 +1813,18 @@ impl<'a, K, V> ExactSizeIterator for ValuesMut<'a, K, V> { #[unstable(feature = "fused", issue = "35602")] impl<'a, K, V> FusedIterator for ValuesMut<'a, K, V> {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K, V> fmt::Debug for ValuesMut<'a, K, V> + where K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.inner.inner.iter()) + .finish() + } +} + #[stable(feature = "drain", since = "1.6.0")] impl<'a, K, V> Iterator for Drain<'a, K, V> { type Item = (K, V); @@ -1672,6 +1848,92 @@ impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { #[unstable(feature = "fused", issue = "35602")] impl<'a, K, V> FusedIterator for Drain<'a, K, V> {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K, V> fmt::Debug for Drain<'a, K, V> + where K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list() + .entries(self.inner.iter()) + .finish() + } +} + +/// A place for insertion to a `Entry`. +/// +/// See [`HashMap::entry`](struct.HashMap.html#method.entry) for details. +#[must_use = "places do nothing unless written to with `<-` syntax"] +#[unstable(feature = "collection_placement", + reason = "struct name and placement protocol is subject to change", + issue = "30172")] +pub struct EntryPlace<'a, K: 'a, V: 'a> { + bucket: FullBucketMut<'a, K, V>, +} + +#[unstable(feature = "collection_placement", + reason = "struct name and placement protocol is subject to change", + issue = "30172")] +impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for EntryPlace<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("EntryPlace") + .field("key", self.bucket.read().0) + .field("value", self.bucket.read().1) + .finish() + } +} + +#[unstable(feature = "collection_placement", + reason = "struct name and placement protocol is subject to change", + issue = "30172")] +impl<'a, K, V> Drop for EntryPlace<'a, K, V> { + fn drop(&mut self) { + // Inplacement insertion failed. Only key need to drop. + // The value is failed to insert into map. + unsafe { self.bucket.remove_key() }; + } +} + +#[unstable(feature = "collection_placement", + reason = "placement protocol is subject to change", + issue = "30172")] +impl<'a, K, V> Placer for Entry<'a, K, V> { + type Place = EntryPlace<'a, K, V>; + + fn make_place(self) -> EntryPlace<'a, K, V> { + let b = match self { + Occupied(mut o) => { + unsafe { ptr::drop_in_place(o.elem.read_mut().1); } + o.elem + } + Vacant(v) => { + unsafe { v.insert_key() } + } + }; + EntryPlace { bucket: b } + } +} + +#[unstable(feature = "collection_placement", + reason = "placement protocol is subject to change", + issue = "30172")] +impl<'a, K, V> Place for EntryPlace<'a, K, V> { + fn pointer(&mut self) -> *mut V { + self.bucket.read_mut().1 + } +} + +#[unstable(feature = "collection_placement", + reason = "placement protocol is subject to change", + issue = "30172")] +impl<'a, K, V> InPlace for EntryPlace<'a, K, V> { + type Owner = (); + + unsafe fn finalize(self) { + mem::forget(self); + } +} + impl<'a, K, V> Entry<'a, K, V> { #[stable(feature = "rust1", since = "1.0.0")] /// Ensures a value is in the entry by inserting the default if empty, and returns @@ -1707,11 +1969,11 @@ impl<'a, K, V> Entry<'a, K, V> { /// use std::collections::HashMap; /// /// let mut map: HashMap<&str, String> = HashMap::new(); - /// let s = "hoho".to_owned(); + /// let s = "hoho".to_string(); /// /// map.entry("poneyland").or_insert_with(|| s); /// - /// assert_eq!(map["poneyland"], "hoho".to_owned()); + /// assert_eq!(map["poneyland"], "hoho".to_string()); /// ``` pub fn or_insert_with V>(self, default: F) -> &'a mut V { match self { @@ -1756,13 +2018,6 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { self.elem.read().0 } - /// Deprecated, renamed to `remove_entry` - #[unstable(feature = "map_entry_recover_keys", issue = "34285")] - #[rustc_deprecated(since = "1.12.0", reason = "renamed to `remove_entry`")] - pub fn remove_pair(self) -> (K, V) { - self.remove_entry() - } - /// Take the ownership of the key and value from the map. /// /// # Examples @@ -1783,7 +2038,8 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// ``` #[stable(feature = "map_entry_recover_keys2", since = "1.12.0")] pub fn remove_entry(self) -> (K, V) { - pop_internal(self.elem) + let (k, v, _) = pop_internal(self.elem); + (k, v) } /// Gets a reference to the value in the entry. @@ -1961,9 +2217,40 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(self, value: V) -> &'a mut V { + let b = match self.elem { + NeqElem(mut bucket, disp) => { + if disp >= DISPLACEMENT_THRESHOLD { + bucket.table_mut().set_tag(true); + } + robin_hood(bucket, disp, self.hash, self.key, value) + }, + NoElem(mut bucket, disp) => { + if disp >= DISPLACEMENT_THRESHOLD { + bucket.table_mut().set_tag(true); + } + bucket.put(self.hash, self.key, value) + }, + }; + b.into_mut_refs().1 + } + + // Only used for InPlacement insert. Avoid unnecessary value copy. + // The value remains uninitialized. + unsafe fn insert_key(self) -> FullBucketMut<'a, K, V> { match self.elem { - NeqElem(bucket, disp) => robin_hood(bucket, disp, self.hash, self.key, value), - NoElem(bucket) => bucket.put(self.hash, self.key, value).into_mut_refs().1, + NeqElem(mut bucket, disp) => { + if disp >= DISPLACEMENT_THRESHOLD { + bucket.table_mut().set_tag(true); + } + let uninit = mem::uninitialized(); + robin_hood(bucket, disp, self.hash, self.key, uninit) + }, + NoElem(mut bucket, disp) => { + if disp >= DISPLACEMENT_THRESHOLD { + bucket.table_mut().set_tag(true); + } + bucket.put_key(self.hash, self.key) + }, } } } @@ -2099,7 +2386,7 @@ impl BuildHasher for RandomState { /// [`Hasher`]: ../../hash/trait.Hasher.html #[stable(feature = "hashmap_default_hasher", since = "1.13.0")] #[allow(deprecated)] -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct DefaultHasher(SipHasher13); impl DefaultHasher { @@ -2117,10 +2404,9 @@ impl DefaultHasher { #[stable(feature = "hashmap_default_hasher", since = "1.13.0")] impl Default for DefaultHasher { - /// Creates a new `DefaultHasher` using [`DefaultHasher::new`]. See - /// [`DefaultHasher::new`] documentation for more information. + /// Creates a new `DefaultHasher` using [`new`]. See its documentation for more. /// - /// [`DefaultHasher::new`]: #method.new + /// [`new`]: #method.new fn default() -> DefaultHasher { DefaultHasher::new() } @@ -2148,6 +2434,13 @@ impl Default for RandomState { } } +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for RandomState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.pad("RandomState { .. }") + } +} + impl super::Recover for HashMap where K: Eq + Hash + Borrow, S: BuildHasher, @@ -2228,6 +2521,7 @@ mod test_map { use super::RandomState; use cell::RefCell; use rand::{thread_rng, Rng}; + use panic; #[test] fn test_zero_capacities() { @@ -3070,4 +3364,88 @@ mod test_map { assert_eq!(a.len(), 1); assert_eq!(a[key], value); } + + #[test] + fn test_retain() { + let mut map: HashMap = (0..100).map(|x|(x, x*10)).collect(); + + map.retain(|&k, _| k % 2 == 0); + assert_eq!(map.len(), 50); + assert_eq!(map[&2], 20); + assert_eq!(map[&4], 40); + assert_eq!(map[&6], 60); + } + + #[test] + fn test_adaptive() { + const TEST_LEN: usize = 5000; + // by cloning we get maps with the same hasher seed + let mut first = HashMap::new(); + let mut second = first.clone(); + first.extend((0..TEST_LEN).map(|i| (i, i))); + second.extend((TEST_LEN..TEST_LEN * 2).map(|i| (i, i))); + + for (&k, &v) in &second { + let prev_cap = first.capacity(); + let expect_grow = first.len() == prev_cap; + first.insert(k, v); + if !expect_grow && first.capacity() != prev_cap { + return; + } + } + panic!("Adaptive early resize failed"); + } + + #[test] + fn test_placement_in() { + let mut map = HashMap::new(); + map.extend((0..10).map(|i| (i, i))); + + map.entry(100) <- 100; + assert_eq!(map[&100], 100); + + map.entry(0) <- 10; + assert_eq!(map[&0], 10); + + assert_eq!(map.len(), 11); + } + + #[test] + fn test_placement_panic() { + let mut map = HashMap::new(); + map.extend((0..10).map(|i| (i, i))); + + fn mkpanic() -> usize { panic!() } + + // modify existing key + // when panic happens, previous key is removed. + let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| { map.entry(0) <- mkpanic(); })); + assert_eq!(map.len(), 9); + assert!(!map.contains_key(&0)); + + // add new key + let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| { map.entry(100) <- mkpanic(); })); + assert_eq!(map.len(), 9); + assert!(!map.contains_key(&100)); + } + + #[test] + fn test_placement_drop() { + // correctly drop + struct TestV<'a>(&'a mut bool); + impl<'a> Drop for TestV<'a> { + fn drop(&mut self) { + if !*self.0 { panic!("value double drop!"); } // no double drop + *self.0 = false; + } + } + + fn makepanic<'a>() -> TestV<'a> { panic!() } + + let mut can_drop = true; + let mut hm = HashMap::new(); + hm.insert(0, TestV(&mut can_drop)); + let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| { hm.entry(0) <- makepanic(); })); + assert_eq!(hm.len(), 0); + } } diff --git a/ctr-std/src/collections/hash/set.rs b/ctr-std/src/collections/hash/set.rs index 72af612..d80df5f 100644 --- a/ctr-std/src/collections/hash/set.rs +++ b/ctr-std/src/collections/hash/set.rs @@ -24,11 +24,10 @@ use super::map::{self, HashMap, Keys, RandomState}; // for `bucket.val` in the case of HashSet. I suppose we would need HKT // to get rid of it properly. -/// An implementation of a hash set using the underlying representation of a -/// HashMap where the value is (). +/// A hash set implemented as a `HashMap` where the value is `()`. /// -/// As with the `HashMap` type, a `HashSet` requires that the elements -/// implement the `Eq` and `Hash` traits. This can frequently be achieved by +/// As with the [`HashMap`] type, a `HashSet` requires that the elements +/// implement the [`Eq`] and [`Hash`] traits. This can frequently be achieved by /// using `#[derive(PartialEq, Eq, Hash)]`. If you implement these yourself, /// it is important that the following property holds: /// @@ -40,9 +39,9 @@ use super::map::{self, HashMap, Keys, RandomState}; /// /// /// It is a logic error for an item to be modified in such a way that the -/// item's hash, as determined by the `Hash` trait, or its equality, as -/// determined by the `Eq` trait, changes while it is in the set. This is -/// normally only possible through `Cell`, `RefCell`, global state, I/O, or +/// item's hash, as determined by the [`Hash`] trait, or its equality, as +/// determined by the [`Eq`] trait, changes while it is in the set. This is +/// normally only possible through [`Cell`], [`RefCell`], global state, I/O, or /// unsafe code. /// /// # Examples @@ -75,8 +74,8 @@ use super::map::{self, HashMap, Keys, RandomState}; /// ``` /// /// The easiest way to use `HashSet` with a custom type is to derive -/// `Eq` and `Hash`. We must also derive `PartialEq`, this will in the -/// future be implied by `Eq`. +/// [`Eq`] and [`Hash`]. We must also derive [`PartialEq`], this will in the +/// future be implied by [`Eq`]. /// /// ``` /// use std::collections::HashSet; @@ -99,7 +98,7 @@ use super::map::{self, HashMap, Keys, RandomState}; /// } /// ``` /// -/// HashSet with fixed list of elements can be initialized from an array: +/// A `HashSet` with fixed list of elements can be initialized from an array: /// /// ``` /// use std::collections::HashSet; @@ -110,8 +109,13 @@ use super::map::{self, HashMap, Keys, RandomState}; /// // use the values stored in the set /// } /// ``` - - +/// +/// [`Cell`]: ../../std/cell/struct.Cell.html +/// [`Eq`]: ../../std/cmp/trait.Eq.html +/// [`Hash`]: ../../std/hash/trait.Hash.html +/// [`HashMap`]: struct.HashMap.html +/// [`PartialEq`]: ../../std/cmp/trait.PartialEq.html +/// [`RefCell`]: ../../std/cell/struct.RefCell.html #[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")] pub struct HashSet { @@ -181,7 +185,7 @@ impl HashSet HashSet { map: HashMap::with_hasher(hasher) } } - /// Creates an empty HashSet with with the specified capacity, using + /// Creates an empty `HashSet` with with the specified capacity, using /// `hasher` to hash the keys. /// /// The hash set will be able to hold at least `capacity` elements without @@ -208,7 +212,9 @@ impl HashSet HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } } - /// Returns a reference to the set's hasher. + /// Returns a reference to the set's [`BuildHasher`]. + /// + /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html #[stable(feature = "hashmap_public_hasher", since = "1.9.0")] pub fn hasher(&self) -> &S { self.map.hasher() @@ -271,7 +277,7 @@ impl HashSet } /// An iterator visiting all elements in arbitrary order. - /// Iterator element type is &'a T. + /// The iterator element type is `&'a T`. /// /// # Examples /// @@ -291,7 +297,8 @@ impl HashSet Iter { iter: self.map.keys() } } - /// Visit the values representing the difference. + /// Visits the values representing the difference, + /// i.e. the values that are in `self` but not in `other`. /// /// # Examples /// @@ -321,7 +328,8 @@ impl HashSet } } - /// Visit the values representing the symmetric difference. + /// Visits the values representing the symmetric difference, + /// i.e. the values that are in `self` or in `other` but not in both. /// /// # Examples /// @@ -348,7 +356,8 @@ impl HashSet SymmetricDifference { iter: self.difference(other).chain(other.difference(self)) } } - /// Visit the values representing the intersection. + /// Visits the values representing the intersection, + /// i.e. the values that are both in `self` and `other`. /// /// # Examples /// @@ -373,7 +382,8 @@ impl HashSet } } - /// Visit the values representing the union. + /// Visits the values representing the union, + /// i.e. all the values in `self` or `other`, without duplicates. /// /// # Examples /// @@ -456,7 +466,7 @@ impl HashSet /// Returns `true` if the set contains a value. /// /// The value may be any borrowed form of the set's value type, but - /// `Hash` and `Eq` on the borrowed form *must* match those for + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for /// the value type. /// /// # Examples @@ -468,6 +478,9 @@ impl HashSet /// assert_eq!(set.contains(&1), true); /// assert_eq!(set.contains(&4), false); /// ``` + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html #[stable(feature = "rust1", since = "1.0.0")] pub fn contains(&self, value: &Q) -> bool where T: Borrow, @@ -479,8 +492,11 @@ impl HashSet /// Returns a reference to the value in the set, if any, that is equal to the given value. /// /// The value may be any borrowed form of the set's value type, but - /// `Hash` and `Eq` on the borrowed form *must* match those for + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for /// the value type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html #[stable(feature = "set_recovery", since = "1.9.0")] pub fn get(&self, value: &Q) -> Option<&T> where T: Borrow, @@ -489,7 +505,7 @@ impl HashSet Recover::get(&self.map, value) } - /// Returns `true` if the set has no elements in common with `other`. + /// Returns `true` if `self` has no elements in common with `other`. /// This is equivalent to checking for an empty intersection. /// /// # Examples @@ -511,7 +527,8 @@ impl HashSet self.iter().all(|v| !other.contains(v)) } - /// Returns `true` if the set is a subset of another. + /// Returns `true` if the set is a subset of another, + /// i.e. `other` contains at least all the values in `self`. /// /// # Examples /// @@ -532,7 +549,8 @@ impl HashSet self.iter().all(|v| other.contains(v)) } - /// Returns `true` if the set is a superset of another. + /// Returns `true` if the set is a superset of another, + /// i.e. `self` contains at least all the values in `other`. /// /// # Examples /// @@ -590,7 +608,7 @@ impl HashSet /// present in the set. /// /// The value may be any borrowed form of the set's value type, but - /// `Hash` and `Eq` on the borrowed form *must* match those for + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for /// the value type. /// /// # Examples @@ -604,6 +622,9 @@ impl HashSet /// assert_eq!(set.remove(&2), true); /// assert_eq!(set.remove(&2), false); /// ``` + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html #[stable(feature = "rust1", since = "1.0.0")] pub fn remove(&mut self, value: &Q) -> bool where T: Borrow, @@ -615,8 +636,11 @@ impl HashSet /// Removes and returns the value in the set, if any, that is equal to the given one. /// /// The value may be any borrowed form of the set's value type, but - /// `Hash` and `Eq` on the borrowed form *must* match those for + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for /// the value type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html #[stable(feature = "set_recovery", since = "1.9.0")] pub fn take(&mut self, value: &Q) -> Option where T: Borrow, @@ -624,6 +648,27 @@ impl HashSet { Recover::take(&mut self.map, value) } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all elements `e` such that `f(&e)` returns `false`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let xs = [1,2,3,4,5,6]; + /// let mut set: HashSet = xs.iter().cloned().collect(); + /// set.retain(|&k| k % 2 == 0); + /// assert_eq!(set.len(), 3); + /// ``` + #[stable(feature = "retain_hash_collection", since = "1.18.0")] + pub fn retain(&mut self, mut f: F) + where F: FnMut(&T) -> bool + { + self.map.retain(|k, _| f(k)); + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -828,25 +873,49 @@ impl<'a, 'b, T, S> Sub<&'b HashSet> for &'a HashSet } } -/// HashSet iterator +/// An iterator over the items of a `HashSet`. +/// +/// This `struct` is created by the [`iter`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`iter`]: struct.HashSet.html#method.iter #[stable(feature = "rust1", since = "1.0.0")] pub struct Iter<'a, K: 'a> { iter: Keys<'a, K, ()>, } -/// HashSet move iterator +/// An owning iterator over the items of a `HashSet`. +/// +/// This `struct` is created by the [`into_iter`] method on [`HashSet`][`HashSet`] +/// (provided by the `IntoIterator` trait). See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`into_iter`]: struct.HashSet.html#method.into_iter #[stable(feature = "rust1", since = "1.0.0")] pub struct IntoIter { iter: map::IntoIter, } -/// HashSet drain iterator +/// A draining iterator over the items of a `HashSet`. +/// +/// This `struct` is created by the [`drain`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`drain`]: struct.HashSet.html#method.drain #[stable(feature = "rust1", since = "1.0.0")] pub struct Drain<'a, K: 'a> { iter: map::Drain<'a, K, ()>, } -/// Intersection iterator +/// A lazy iterator producing elements in the intersection of `HashSet`s. +/// +/// This `struct` is created by the [`intersection`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`intersection`]: struct.HashSet.html#method.intersection #[stable(feature = "rust1", since = "1.0.0")] pub struct Intersection<'a, T: 'a, S: 'a> { // iterator of the first set @@ -855,7 +924,13 @@ pub struct Intersection<'a, T: 'a, S: 'a> { other: &'a HashSet, } -/// Difference iterator +/// A lazy iterator producing elements in the difference of `HashSet`s. +/// +/// This `struct` is created by the [`difference`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`difference`]: struct.HashSet.html#method.difference #[stable(feature = "rust1", since = "1.0.0")] pub struct Difference<'a, T: 'a, S: 'a> { // iterator of the first set @@ -864,13 +939,25 @@ pub struct Difference<'a, T: 'a, S: 'a> { other: &'a HashSet, } -/// Symmetric difference iterator. +/// A lazy iterator producing elements in the symmetric difference of `HashSet`s. +/// +/// This `struct` is created by the [`symmetric_difference`] method on +/// [`HashSet`]. See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`symmetric_difference`]: struct.HashSet.html#method.symmetric_difference #[stable(feature = "rust1", since = "1.0.0")] pub struct SymmetricDifference<'a, T: 'a, S: 'a> { iter: Chain, Difference<'a, T, S>>, } -/// Set union iterator. +/// A lazy iterator producing elements in the union of `HashSet`s. +/// +/// This `struct` is created by the [`union`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`union`]: struct.HashSet.html#method.union #[stable(feature = "rust1", since = "1.0.0")] pub struct Union<'a, T: 'a, S: 'a> { iter: Chain, Difference<'a, T, S>>, @@ -948,6 +1035,13 @@ impl<'a, K> ExactSizeIterator for Iter<'a, K> { #[unstable(feature = "fused", issue = "35602")] impl<'a, K> FusedIterator for Iter<'a, K> {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K: fmt::Debug> fmt::Debug for Iter<'a, K> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for IntoIter { type Item = K; @@ -968,6 +1062,17 @@ impl ExactSizeIterator for IntoIter { #[unstable(feature = "fused", issue = "35602")] impl FusedIterator for IntoIter {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let entries_iter = self.iter + .inner + .iter() + .map(|(k, _)| k); + f.debug_list().entries(entries_iter).finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K> Iterator for Drain<'a, K> { type Item = K; @@ -988,6 +1093,17 @@ impl<'a, K> ExactSizeIterator for Drain<'a, K> { #[unstable(feature = "fused", issue = "35602")] impl<'a, K> FusedIterator for Drain<'a, K> {} +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, K: fmt::Debug> fmt::Debug for Drain<'a, K> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let entries_iter = self.iter + .inner + .iter() + .map(|(k, _)| k); + f.debug_list().entries(entries_iter).finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for Intersection<'a, T, S> { fn clone(&self) -> Intersection<'a, T, S> { @@ -1021,6 +1137,16 @@ impl<'a, T, S> Iterator for Intersection<'a, T, S> } } +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, T, S> fmt::Debug for Intersection<'a, T, S> + where T: fmt::Debug + Eq + Hash, + S: BuildHasher +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + #[unstable(feature = "fused", issue = "35602")] impl<'a, T, S> FusedIterator for Intersection<'a, T, S> where T: Eq + Hash, @@ -1068,6 +1194,16 @@ impl<'a, T, S> FusedIterator for Difference<'a, T, S> { } +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, T, S> fmt::Debug for Difference<'a, T, S> + where T: fmt::Debug + Eq + Hash, + S: BuildHasher +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for SymmetricDifference<'a, T, S> { fn clone(&self) -> SymmetricDifference<'a, T, S> { @@ -1097,6 +1233,16 @@ impl<'a, T, S> FusedIterator for SymmetricDifference<'a, T, S> { } +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, T, S> fmt::Debug for SymmetricDifference<'a, T, S> + where T: fmt::Debug + Eq + Hash, + S: BuildHasher +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for Union<'a, T, S> { fn clone(&self) -> Union<'a, T, S> { @@ -1111,6 +1257,16 @@ impl<'a, T, S> FusedIterator for Union<'a, T, S> { } +#[stable(feature = "std_debug", since = "1.16.0")] +impl<'a, T, S> fmt::Debug for Union<'a, T, S> + where T: fmt::Debug + Eq + Hash, + S: BuildHasher +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Iterator for Union<'a, T, S> where T: Eq + Hash, @@ -1528,4 +1684,15 @@ mod test_set { assert!(a.contains(&5)); assert!(a.contains(&6)); } + + #[test] + fn test_retain() { + let xs = [1, 2, 3, 4, 5, 6]; + let mut set: HashSet = xs.iter().cloned().collect(); + set.retain(|&k| k % 2 == 0); + assert_eq!(set.len(), 3); + assert!(set.contains(&2)); + assert!(set.contains(&4)); + assert!(set.contains(&6)); + } } diff --git a/ctr-std/src/collections/hash/table.rs b/ctr-std/src/collections/hash/table.rs index bb0fd3c..06f4f76 100644 --- a/ctr-std/src/collections/hash/table.rs +++ b/ctr-std/src/collections/hash/table.rs @@ -8,16 +8,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![allow(deprecated)] - -use sys::alloc::Heap; -use alloc::allocator::{Alloc,Layout}; +use alloc::heap::{Heap, Alloc, Layout}; use cmp; use hash::{BuildHasher, Hash, Hasher}; -use intrinsics::needs_drop; use marker; -use mem::{align_of, size_of}; +use mem::{align_of, size_of, needs_drop}; use mem; use ops::{Deref, DerefMut}; use ptr::{self, Unique, Shared}; @@ -36,6 +32,44 @@ use self::BucketState::*; type HashUint = usize; const EMPTY_BUCKET: HashUint = 0; +const EMPTY: usize = 1; + +/// Special `Unique` that uses the lower bit of the pointer +/// to expose a boolean tag. +/// Note: when the pointer is initialized to EMPTY `.ptr()` will return +/// null and the tag functions shouldn't be used. +struct TaggedHashUintPtr(Unique); + +impl TaggedHashUintPtr { + #[inline] + unsafe fn new(ptr: *mut HashUint) -> Self { + debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize); + TaggedHashUintPtr(Unique::new(ptr)) + } + + #[inline] + fn set_tag(&mut self, value: bool) { + let mut usize_ptr = self.0.as_ptr() as usize; + unsafe { + if value { + usize_ptr |= 1; + } else { + usize_ptr &= !1; + } + self.0 = Unique::new(usize_ptr as *mut HashUint) + } + } + + #[inline] + fn tag(&self) -> bool { + (self.0.as_ptr() as usize) & 1 == 1 + } + + #[inline] + fn ptr(&self) -> *mut HashUint { + (self.0.as_ptr() as usize & !1) as *mut HashUint + } +} /// The raw hashtable, providing safe-ish access to the unzipped and highly /// optimized arrays of hashes, and key-value pairs. @@ -75,10 +109,14 @@ const EMPTY_BUCKET: HashUint = 0; /// around just the "table" part of the hashtable. It enforces some /// invariants at the type level and employs some performance trickery, /// but in general is just a tricked out `Vec>`. +/// +/// The hashtable also exposes a special boolean tag. The tag defaults to false +/// when the RawTable is created and is accessible with the `tag` and `set_tag` +/// functions. pub struct RawTable { - capacity: usize, + capacity_mask: usize, size: usize, - hashes: Unique, + hashes: TaggedHashUintPtr, // Because K/V do not appear directly in any of the types in the struct, // inform rustc that in fact instances of K and V are reachable from here. @@ -88,10 +126,13 @@ pub struct RawTable { unsafe impl Send for RawTable {} unsafe impl Sync for RawTable {} -struct RawBucket { - hash: *mut HashUint, +// An unsafe view of a RawTable bucket +// Valid indexes are within [0..table_capacity) +pub struct RawBucket { + hash_start: *mut HashUint, // We use *const to ensure covariance with respect to K and V - pair: *const (K, V), + pair_start: *const (K, V), + idx: usize, _marker: marker::PhantomData<(K, V)>, } @@ -104,7 +145,6 @@ impl Clone for RawBucket { pub struct Bucket { raw: RawBucket, - idx: usize, table: M, } @@ -117,13 +157,11 @@ impl Clone for Bucket { pub struct EmptyBucket { raw: RawBucket, - idx: usize, table: M, } pub struct FullBucket { raw: RawBucket, - idx: usize, table: M, } @@ -195,13 +233,17 @@ fn can_alias_safehash_as_hash() { assert_eq!(size_of::(), size_of::()) } +// RawBucket methods are unsafe as it's possible to +// make a RawBucket point to invalid memory using safe code. impl RawBucket { - unsafe fn offset(self, count: isize) -> RawBucket { - RawBucket { - hash: self.hash.offset(count), - pair: self.pair.offset(count), - _marker: marker::PhantomData, - } + unsafe fn hash(&self) -> *mut HashUint { + self.hash_start.offset(self.idx as isize) + } + unsafe fn pair(&self) -> *mut (K, V) { + self.pair_start.offset(self.idx as isize) as *mut (K, V) + } + unsafe fn hash_pair(&self) -> (*mut HashUint, *mut (K, V)) { + (self.hash(), self.pair()) } } @@ -211,13 +253,21 @@ impl FullBucket { pub fn table(&self) -> &M { &self.table } + /// Borrow a mutable reference to the table. + pub fn table_mut(&mut self) -> &mut M { + &mut self.table + } /// Move out the reference to the table. pub fn into_table(self) -> M { self.table } /// Get the raw index. pub fn index(&self) -> usize { - self.idx + self.raw.idx + } + /// Get the raw bucket. + pub fn raw(&self) -> RawBucket { + self.raw } } @@ -226,12 +276,20 @@ impl EmptyBucket { pub fn table(&self) -> &M { &self.table } + /// Borrow a mutable reference to the table. + pub fn table_mut(&mut self) -> &mut M { + &mut self.table + } } impl Bucket { /// Get the raw index. pub fn index(&self) -> usize { - self.idx + self.raw.idx + } + /// get the table. + pub fn into_table(self) -> M { + self.table } } @@ -278,63 +336,97 @@ impl>> Bucket { Bucket::at_index(table, hash.inspect() as usize) } + pub fn new_from(r: RawBucket, t: M) + -> Bucket + { + Bucket { + raw: r, + table: t, + } + } + pub fn at_index(table: M, ib_index: usize) -> Bucket { // if capacity is 0, then the RawBucket will be populated with bogus pointers. // This is an uncommon case though, so avoid it in release builds. debug_assert!(table.capacity() > 0, "Table should have capacity at this point"); - let ib_index = ib_index & (table.capacity() - 1); + let ib_index = ib_index & table.capacity_mask; Bucket { - raw: unsafe { table.first_bucket_raw().offset(ib_index as isize) }, - idx: ib_index, + raw: table.raw_bucket_at(ib_index), table: table, } } pub fn first(table: M) -> Bucket { Bucket { - raw: table.first_bucket_raw(), - idx: 0, + raw: table.raw_bucket_at(0), table: table, } } + // "So a few of the first shall be last: for many be called, + // but few chosen." + // + // We'll most likely encounter a few buckets at the beginning that + // have their initial buckets near the end of the table. They were + // placed at the beginning as the probe wrapped around the table + // during insertion. We must skip forward to a bucket that won't + // get reinserted too early and won't unfairly steal others spot. + // This eliminates the need for robin hood. + pub fn head_bucket(table: M) -> Bucket { + let mut bucket = Bucket::first(table); + + loop { + bucket = match bucket.peek() { + Full(full) => { + if full.displacement() == 0 { + // This bucket occupies its ideal spot. + // It indicates the start of another "cluster". + bucket = full.into_bucket(); + break; + } + // Leaving this bucket in the last cluster for later. + full.into_bucket() + } + Empty(b) => { + // Encountered a hole between clusters. + b.into_bucket() + } + }; + bucket.next(); + } + bucket + } + /// Reads a bucket at a given index, returning an enum indicating whether /// it's initialized or not. You need to match on this enum to get /// the appropriate types to call most of the other functions in /// this module. pub fn peek(self) -> BucketState { - match unsafe { *self.raw.hash } { + match unsafe { *self.raw.hash() } { EMPTY_BUCKET => { Empty(EmptyBucket { raw: self.raw, - idx: self.idx, table: self.table, }) } _ => { Full(FullBucket { raw: self.raw, - idx: self.idx, table: self.table, }) } } } - /// Modifies the bucket pointer in place to make it point to the next slot. + /// Modifies the bucket in place to make it point to the next slot. pub fn next(&mut self) { - self.idx += 1; - let range = self.table.capacity(); - // This code is branchless thanks to a conditional move. - let dist = if self.idx & (range - 1) == 0 { - 1 - range as isize - } else { - 1 - }; - unsafe { - self.raw = self.raw.offset(dist); - } + self.raw.idx = self.raw.idx.wrapping_add(1) & self.table.capacity_mask; + } + + /// Modifies the bucket in place to make it point to the previous slot. + pub fn prev(&mut self) { + self.raw.idx = self.raw.idx.wrapping_sub(1) & self.table.capacity_mask; } } @@ -350,26 +442,24 @@ impl>> EmptyBucket { pub fn into_bucket(self) -> Bucket { Bucket { raw: self.raw, - idx: self.idx, table: self.table, } } - pub fn gap_peek(self) -> Option> { + pub fn gap_peek(self) -> Result, Bucket> { let gap = EmptyBucket { raw: self.raw, - idx: self.idx, table: (), }; match self.next().peek() { Full(bucket) => { - Some(GapThenFull { + Ok(GapThenFull { gap: gap, full: bucket, }) } - Empty(..) => None, + Empty(e) => Err(e.into_bucket()), } } } @@ -386,15 +476,29 @@ impl EmptyBucket /// Use `make_hash` to construct a `SafeHash` to pass to this function. pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { unsafe { - *self.raw.hash = hash.inspect(); - ptr::write(self.raw.pair as *mut (K, V), (key, value)); + *self.raw.hash() = hash.inspect(); + ptr::write(self.raw.pair(), (key, value)); self.table.borrow_table_mut().size += 1; } FullBucket { raw: self.raw, - idx: self.idx, + table: self.table, + } + } + + /// Puts given key, remain value uninitialized. + /// It is only used for inplacement insertion. + pub unsafe fn put_key(mut self, hash: SafeHash, key: K) -> FullBucket { + *self.raw.hash() = hash.inspect(); + let pair_ptr = self.raw.pair(); + ptr::write(&mut (*pair_ptr).0, key); + + self.table.borrow_table_mut().size += 1; + + FullBucket { + raw: self.raw, table: self.table, } } @@ -412,7 +516,6 @@ impl>> FullBucket { pub fn into_bucket(self) -> Bucket { Bucket { raw: self.raw, - idx: self.idx, table: self.table, } } @@ -422,7 +525,6 @@ impl>> FullBucket { pub fn stash(self) -> FullBucket { FullBucket { raw: self.raw, - idx: self.idx, table: self, } } @@ -436,17 +538,20 @@ impl>> FullBucket { // Calculates the distance one has to travel when going from // `hash mod capacity` onwards to `idx mod capacity`, wrapping around // if the destination is not reached before the end of the table. - (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1) + (self.raw.idx.wrapping_sub(self.hash().inspect() as usize)) & self.table.capacity_mask } #[inline] pub fn hash(&self) -> SafeHash { - unsafe { SafeHash { hash: *self.raw.hash } } + unsafe { SafeHash { hash: *self.raw.hash() } } } /// Gets references to the key and value at a given index. pub fn read(&self) -> (&K, &V) { - unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) } + unsafe { + let pair_ptr = self.raw.pair(); + (&(*pair_ptr).0, &(*pair_ptr).1) + } } } @@ -462,17 +567,27 @@ impl<'t, K, V> FullBucket> { self.table.size -= 1; unsafe { - *self.raw.hash = EMPTY_BUCKET; - let (k, v) = ptr::read(self.raw.pair); + *self.raw.hash() = EMPTY_BUCKET; + let (k, v) = ptr::read(self.raw.pair()); (EmptyBucket { raw: self.raw, - idx: self.idx, table: self.table, }, k, v) } } + + /// Remove this bucket's `key` from the hashtable. + /// Only used for inplacement insertion. + /// NOTE: `Value` is uninitialized when this function is called, don't try to drop the `Value`. + pub unsafe fn remove_key(&mut self) { + self.table.size -= 1; + + *self.raw.hash() = EMPTY_BUCKET; + let pair_ptr = self.raw.pair(); + ptr::drop_in_place(&mut (*pair_ptr).0); // only drop key + } } // This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases @@ -482,8 +597,8 @@ impl FullBucket { pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { unsafe { - let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); - let (old_key, old_val) = ptr::replace(self.raw.pair as *mut (K, V), (k, v)); + let old_hash = ptr::replace(self.raw.hash() as *mut SafeHash, h); + let (old_key, old_val) = ptr::replace(self.raw.pair(), (k, v)); (old_hash, old_key, old_val) } @@ -495,8 +610,10 @@ impl FullBucket { /// Gets mutable references to the key and value at a given index. pub fn read_mut(&mut self) -> (&mut K, &mut V) { - let pair_mut = self.raw.pair as *mut (K, V); - unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) } + unsafe { + let pair_ptr = self.raw.pair(); + (&mut (*pair_ptr).0, &mut (*pair_ptr).1) + } } } @@ -509,7 +626,10 @@ impl<'t, K, V, M> FullBucket /// in exchange for this, the returned references have a longer lifetime /// than the references returned by `read()`. pub fn into_refs(self) -> (&'t K, &'t V) { - unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) } + unsafe { + let pair_ptr = self.raw.pair(); + (&(*pair_ptr).0, &(*pair_ptr).1) + } } } @@ -519,8 +639,10 @@ impl<'t, K, V, M> FullBucket /// This works similarly to `into_refs`, exchanging a bucket state /// for mutable references into the table. pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { - let pair_mut = self.raw.pair as *mut (K, V); - unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) } + unsafe { + let pair_ptr = self.raw.pair(); + (&mut (*pair_ptr).0, &mut (*pair_ptr).1) + } } } @@ -532,24 +654,29 @@ impl GapThenFull &self.full } - pub fn shift(mut self) -> Option> { + pub fn into_table(self) -> M { + self.full.into_table() + } + + pub fn shift(mut self) -> Result, Bucket> { unsafe { - *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); - ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1); + let (gap_hash, gap_pair) = self.gap.raw.hash_pair(); + let (full_hash, full_pair) = self.full.raw.hash_pair(); + *gap_hash = mem::replace(&mut *full_hash, EMPTY_BUCKET); + ptr::copy_nonoverlapping(full_pair, gap_pair, 1); } - let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; + let FullBucket { raw: prev_raw, .. } = self.full; match self.full.next().peek() { Full(bucket) => { self.gap.raw = prev_raw; - self.gap.idx = prev_idx; self.full = bucket; - Some(self) + Ok(self) } - Empty(..) => None, + Empty(b) => Err(b.into_bucket()), } } } @@ -622,8 +749,8 @@ impl RawTable { if capacity == 0 { return RawTable { size: 0, - capacity: 0, - hashes: Unique::empty(), + capacity_mask: capacity.wrapping_sub(1), + hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint), marker: marker::PhantomData, }; } @@ -654,31 +781,33 @@ impl RawTable { .expect("capacity overflow"), "capacity overflow"); - let layout = Layout::from_size_align(size, alignment).expect("invalid alloc layout"); - let buffer = Heap.alloc(layout).unwrap_or_else(|err| Heap.oom(err)); + let buffer = Heap.alloc(Layout::from_size_align(size, alignment).unwrap()) + .unwrap_or_else(|e| Heap.oom(e)); let hashes = buffer.offset(hash_offset as isize) as *mut HashUint; RawTable { - capacity: capacity, + capacity_mask: capacity.wrapping_sub(1), size: 0, - hashes: Unique::new(hashes), + hashes: TaggedHashUintPtr::new(hashes), marker: marker::PhantomData, } } - fn first_bucket_raw(&self) -> RawBucket { - let hashes_size = self.capacity * size_of::(); - let pairs_size = self.capacity * size_of::<(K, V)>(); + fn raw_bucket_at(&self, index: usize) -> RawBucket { + let hashes_size = self.capacity() * size_of::(); + let pairs_size = self.capacity() * size_of::<(K, V)>(); - let buffer = self.hashes.as_ptr(); let (pairs_offset, _, oflo) = calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>()); debug_assert!(!oflo, "capacity overflow"); + + let buffer = self.hashes.ptr() as *mut u8; unsafe { RawBucket { - hash: self.hashes.as_ptr(), - pair: buffer.offset(pairs_offset as isize) as *const _, + hash_start: buffer as *mut HashUint, + pair_start: buffer.offset(pairs_offset as isize) as *const (K, V), + idx: index, _marker: marker::PhantomData, } } @@ -689,14 +818,14 @@ impl RawTable { pub fn new(capacity: usize) -> RawTable { unsafe { let ret = RawTable::new_uninitialized(capacity); - ptr::write_bytes(ret.hashes.as_ptr(), 0, capacity); + ptr::write_bytes(ret.hashes.ptr(), 0, capacity); ret } } /// The hashtable's capacity, similar to a vector's. pub fn capacity(&self) -> usize { - self.capacity + self.capacity_mask.wrapping_add(1) } /// The number of elements ever `put` in the hashtable, minus the number @@ -707,8 +836,8 @@ impl RawTable { fn raw_buckets(&self) -> RawBuckets { RawBuckets { - raw: self.first_bucket_raw(), - hashes_end: unsafe { self.hashes.as_ptr().offset(self.capacity as isize) }, + raw: self.raw_bucket_at(0), + elems_left: self.size, marker: marker::PhantomData, } } @@ -716,25 +845,23 @@ impl RawTable { pub fn iter(&self) -> Iter { Iter { iter: self.raw_buckets(), - elems_left: self.size(), } } pub fn iter_mut(&mut self) -> IterMut { IterMut { iter: self.raw_buckets(), - elems_left: self.size(), _marker: marker::PhantomData, } } pub fn into_iter(self) -> IntoIter { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); + let RawBuckets { raw, elems_left, .. } = self.raw_buckets(); // Replace the marker regardless of lifetime bounds on parameters. IntoIter { iter: RawBuckets { raw: raw, - hashes_end: hashes_end, + elems_left: elems_left, marker: marker::PhantomData, }, table: self, @@ -742,12 +869,12 @@ impl RawTable { } pub fn drain(&mut self) -> Drain { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); + let RawBuckets { raw, elems_left, .. } = self.raw_buckets(); // Replace the marker regardless of lifetime bounds on parameters. Drain { iter: RawBuckets { raw: raw, - hashes_end: hashes_end, + elems_left: elems_left, marker: marker::PhantomData, }, table: unsafe { Shared::new(self) }, @@ -755,24 +882,40 @@ impl RawTable { } } - /// Returns an iterator that copies out each entry. Used while the table - /// is being dropped. - unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { - let raw_bucket = self.first_bucket_raw(); - RevMoveBuckets { - raw: raw_bucket.offset(self.capacity as isize), - hashes_end: raw_bucket.hash, - elems_left: self.size, - marker: marker::PhantomData, + /// Drops buckets in reverse order. It leaves the table in an inconsistent + /// state and should only be used for dropping the table's remaining + /// entries. It's used in the implementation of Drop. + unsafe fn rev_drop_buckets(&mut self) { + // initialize the raw bucket past the end of the table + let mut raw = self.raw_bucket_at(self.capacity()); + let mut elems_left = self.size; + + while elems_left != 0 { + raw.idx -= 1; + + if *raw.hash() != EMPTY_BUCKET { + elems_left -= 1; + ptr::drop_in_place(raw.pair()); + } } } + + /// Set the table tag + pub fn set_tag(&mut self, value: bool) { + self.hashes.set_tag(value) + } + + /// Get the table tag + pub fn tag(&self) -> bool { + self.hashes.tag() + } } /// A raw iterator. The basis for some other iterators in this module. Although /// this interface is safe, it's not used outside this module. struct RawBuckets<'a, K, V> { raw: RawBucket, - hashes_end: *mut HashUint, + elems_left: usize, // Strictly speaking, this should be &'a (K,V), but that would // require that K:'a, and we often use RawBuckets<'static...> for @@ -787,7 +930,7 @@ impl<'a, K, V> Clone for RawBuckets<'a, K, V> { fn clone(&self) -> RawBuckets<'a, K, V> { RawBuckets { raw: self.raw, - hashes_end: self.hashes_end, + elems_left: self.elems_left, marker: marker::PhantomData, } } @@ -798,62 +941,36 @@ impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { type Item = RawBucket; fn next(&mut self) -> Option> { - while self.raw.hash != self.hashes_end { - unsafe { - // We are swapping out the pointer to a bucket and replacing - // it with the pointer to the next one. - let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); - if *prev.hash != EMPTY_BUCKET { - return Some(prev); - } - } - } - - None - } -} - -/// An iterator that moves out buckets in reverse order. It leaves the table -/// in an inconsistent state and should only be used for dropping -/// the table's remaining entries. It's used in the implementation of Drop. -struct RevMoveBuckets<'a, K, V> { - raw: RawBucket, - hashes_end: *mut HashUint, - elems_left: usize, - - // As above, `&'a (K,V)` would seem better, but we often use - // 'static for the lifetime, and this is not a publicly exposed - // type. - marker: marker::PhantomData<&'a ()>, -} - -impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { - type Item = (K, V); - - fn next(&mut self) -> Option<(K, V)> { if self.elems_left == 0 { return None; } loop { - debug_assert!(self.raw.hash != self.hashes_end); - unsafe { - self.raw = self.raw.offset(-1); - - if *self.raw.hash != EMPTY_BUCKET { + let item = self.raw; + self.raw.idx += 1; + if *item.hash() != EMPTY_BUCKET { self.elems_left -= 1; - return Some(ptr::read(self.raw.pair)); + return Some(item); } } } } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } +} + +impl<'a, K, V> ExactSizeIterator for RawBuckets<'a, K, V> { + fn len(&self) -> usize { + self.elems_left + } } /// Iterator over shared references to entries in a table. pub struct Iter<'a, K: 'a, V: 'a> { iter: RawBuckets<'a, K, V>, - elems_left: usize, } unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {} @@ -864,16 +981,13 @@ impl<'a, K, V> Clone for Iter<'a, K, V> { fn clone(&self) -> Iter<'a, K, V> { Iter { iter: self.iter.clone(), - elems_left: self.elems_left, } } } - /// Iterator over mutable references to entries in a table. pub struct IterMut<'a, K: 'a, V: 'a> { iter: RawBuckets<'a, K, V>, - elems_left: usize, // To ensure invariance with respect to V _marker: marker::PhantomData<&'a mut V>, } @@ -883,6 +997,14 @@ unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {} // but Send is the more useful bound unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} +impl<'a, K: 'a, V: 'a> IterMut<'a, K, V> { + pub fn iter(&self) -> Iter { + Iter { + iter: self.iter.clone(), + } + } +} + /// Iterator over the entries in a table, consuming the table. pub struct IntoIter { table: RawTable, @@ -892,6 +1014,14 @@ pub struct IntoIter { unsafe impl Sync for IntoIter {} unsafe impl Send for IntoIter {} +impl IntoIter { + pub fn iter(&self) -> Iter { + Iter { + iter: self.iter.clone(), + } + } +} + /// Iterator over the entries in a table, clearing the table. pub struct Drain<'a, K: 'a, V: 'a> { table: Shared>, @@ -902,23 +1032,32 @@ pub struct Drain<'a, K: 'a, V: 'a> { unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {} unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {} +impl<'a, K, V> Drain<'a, K, V> { + pub fn iter(&self) -> Iter { + Iter { + iter: self.iter.clone(), + } + } +} + impl<'a, K, V> Iterator for Iter<'a, K, V> { type Item = (&'a K, &'a V); fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { (&(*bucket.pair).0, &(*bucket.pair).1) } + self.iter.next().map(|raw| unsafe { + let pair_ptr = raw.pair(); + (&(*pair_ptr).0, &(*pair_ptr).1) }) } fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) + self.iter.size_hint() } } + impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { fn len(&self) -> usize { - self.elems_left + self.iter.len() } } @@ -926,20 +1065,20 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { type Item = (&'a K, &'a mut V); fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - let pair_mut = bucket.pair as *mut (K, V); - unsafe { (&(*pair_mut).0, &mut (*pair_mut).1) } + self.iter.next().map(|raw| unsafe { + let pair_ptr = raw.pair(); + (&(*pair_ptr).0, &mut (*pair_ptr).1) }) } fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) + self.iter.size_hint() } } + impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { fn len(&self) -> usize { - self.elems_left + self.iter.len() } } @@ -947,23 +1086,23 @@ impl Iterator for IntoIter { type Item = (SafeHash, K, V); fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { + self.iter.next().map(|raw| { self.table.size -= 1; unsafe { - let (k, v) = ptr::read(bucket.pair); - (SafeHash { hash: *bucket.hash }, k, v) + let (k, v) = ptr::read(raw.pair()); + (SafeHash { hash: *raw.hash() }, k, v) } }) } fn size_hint(&self) -> (usize, Option) { - let size = self.table.size(); - (size, Some(size)) + self.iter.size_hint() } } + impl ExactSizeIterator for IntoIter { fn len(&self) -> usize { - self.table.size() + self.iter().len() } } @@ -972,23 +1111,23 @@ impl<'a, K, V> Iterator for Drain<'a, K, V> { #[inline] fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { + self.iter.next().map(|raw| { unsafe { - (*self.table.as_mut_ptr()).size -= 1; - let (k, v) = ptr::read(bucket.pair); - (SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) }, k, v) + self.table.as_mut().size -= 1; + let (k, v) = ptr::read(raw.pair()); + (SafeHash { hash: ptr::replace(&mut *raw.hash(), EMPTY_BUCKET) }, k, v) } }) } fn size_hint(&self) -> (usize, Option) { - let size = unsafe { (*self.table.as_mut_ptr()).size() }; - (size, Some(size)) + self.iter.size_hint() } } + impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { fn len(&self) -> usize { - unsafe { (*self.table.as_mut_ptr()).size() } + self.iter.len() } } @@ -1001,30 +1140,21 @@ impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { impl Clone for RawTable { fn clone(&self) -> RawTable { unsafe { - let mut new_ht = RawTable::new_uninitialized(self.capacity()); - - { - let cap = self.capacity(); - let mut new_buckets = Bucket::first(&mut new_ht); - let mut buckets = Bucket::first(self); - while buckets.index() != cap { - match buckets.peek() { - Full(full) => { - let (h, k, v) = { - let (k, v) = full.read(); - (full.hash(), k.clone(), v.clone()) - }; - *new_buckets.raw.hash = h.inspect(); - ptr::write(new_buckets.raw.pair as *mut (K, V), (k, v)); - } - Empty(..) => { - *new_buckets.raw.hash = EMPTY_BUCKET; - } - } - new_buckets.next(); - buckets.next(); + let cap = self.capacity(); + let mut new_ht = RawTable::new_uninitialized(cap); + + let mut new_buckets = new_ht.raw_bucket_at(0); + let mut buckets = self.raw_bucket_at(0); + while buckets.idx < cap { + *new_buckets.hash() = *buckets.hash(); + if *new_buckets.hash() != EMPTY_BUCKET { + let pair_ptr = buckets.pair(); + let kv = ((*pair_ptr).0.clone(), (*pair_ptr).1.clone()); + ptr::write(new_buckets.pair(), kv); } - }; + buckets.idx += 1; + new_buckets.idx += 1; + } new_ht.size = self.size(); @@ -1033,10 +1163,9 @@ impl Clone for RawTable { } } -impl Drop for RawTable { - #[unsafe_destructor_blind_to_params] +unsafe impl<#[may_dangle] K, #[may_dangle] V> Drop for RawTable { fn drop(&mut self) { - if self.capacity == 0 { + if self.capacity() == 0 { return; } @@ -1048,12 +1177,12 @@ impl Drop for RawTable { unsafe { if needs_drop::<(K, V)>() { // avoid linear runtime for types that don't need drop - for _ in self.rev_move_buckets() {} + self.rev_drop_buckets(); } } - let hashes_size = self.capacity * size_of::(); - let pairs_size = self.capacity * size_of::<(K, V)>(); + let hashes_size = self.capacity() * size_of::(); + let pairs_size = self.capacity() * size_of::<(K, V)>(); let (align, _, size, oflo) = calculate_allocation(hashes_size, align_of::(), pairs_size, @@ -1062,8 +1191,8 @@ impl Drop for RawTable { debug_assert!(!oflo, "should be impossible"); unsafe { - let layout = Layout::from_size_align(size, align).expect("invalid alloc layout"); - Heap.dealloc(self.hashes.as_ptr() as *mut u8, layout); + Heap.dealloc(self.hashes.ptr() as *mut u8, + Layout::from_size_align(size, align).unwrap()); // Remember how everything was allocated out of one buffer // during initialization? We only need one call to free here. } diff --git a/ctr-std/src/collections/mod.rs b/ctr-std/src/collections/mod.rs index b9e92a0..b8a6a66 100644 --- a/ctr-std/src/collections/mod.rs +++ b/ctr-std/src/collections/mod.rs @@ -68,7 +68,7 @@ //! * You want to find the largest or smallest key that is smaller or larger //! than something. //! * You want to be able to get all of the entries in order on-demand. -//! * You want a sorted map. +//! * You want a map sorted by its keys. //! //! ### Use the `Set` variant of any of these `Map`s when: //! * You just want to remember which keys you've seen. @@ -157,29 +157,29 @@ //! information to do this itself. Therefore, it is up to us programmers to give //! it hints. //! -//! Any `with_capacity()` constructor will instruct the collection to allocate +//! Any `with_capacity` constructor will instruct the collection to allocate //! enough space for the specified number of elements. Ideally this will be for //! exactly that many elements, but some implementation details may prevent //! this. [`Vec`] and [`VecDeque`] can be relied on to allocate exactly the -//! requested amount, though. Use `with_capacity()` when you know exactly how many +//! requested amount, though. Use `with_capacity` when you know exactly how many //! elements will be inserted, or at least have a reasonable upper-bound on that //! number. //! -//! When anticipating a large influx of elements, the `reserve()` family of +//! When anticipating a large influx of elements, the `reserve` family of //! methods can be used to hint to the collection how much room it should make -//! for the coming items. As with `with_capacity()`, the precise behavior of +//! for the coming items. As with `with_capacity`, the precise behavior of //! these methods will be specific to the collection of interest. //! //! For optimal performance, collections will generally avoid shrinking //! themselves. If you believe that a collection will not soon contain any more -//! elements, or just really need the memory, the `shrink_to_fit()` method prompts +//! elements, or just really need the memory, the `shrink_to_fit` method prompts //! the collection to shrink the backing array to the minimum size capable of //! holding its elements. //! //! Finally, if ever you're interested in what the actual capacity of the -//! collection is, most collections provide a `capacity()` method to query this +//! collection is, most collections provide a `capacity` method to query this //! information on demand. This can be useful for debugging purposes, or for -//! use with the `reserve()` methods. +//! use with the `reserve` methods. //! //! ## Iterators //! @@ -194,11 +194,11 @@ //! //! All of the standard collections provide several iterators for performing //! bulk manipulation of their contents. The three primary iterators almost -//! every collection should provide are `iter()`, `iter_mut()`, and `into_iter()`. +//! every collection should provide are `iter`, `iter_mut`, and `into_iter`. //! Some of these are not provided on collections where it would be unsound or //! unreasonable to provide them. //! -//! `iter()` provides an iterator of immutable references to all the contents of a +//! `iter` provides an iterator of immutable references to all the contents of a //! collection in the most "natural" order. For sequence collections like [`Vec`], //! this means the items will be yielded in increasing order of index starting //! at 0. For ordered collections like [`BTreeMap`], this means that the items @@ -214,8 +214,8 @@ //! } //! ``` //! -//! `iter_mut()` provides an iterator of *mutable* references in the same order as -//! `iter()`. This is great for mutating all the contents of the collection. +//! `iter_mut` provides an iterator of *mutable* references in the same order as +//! `iter`. This is great for mutating all the contents of the collection. //! //! ``` //! let mut vec = vec![1, 2, 3, 4]; @@ -224,12 +224,12 @@ //! } //! ``` //! -//! `into_iter()` transforms the actual collection into an iterator over its +//! `into_iter` transforms the actual collection into an iterator over its //! contents by-value. This is great when the collection itself is no longer -//! needed, and the values are needed elsewhere. Using `extend()` with `into_iter()` +//! needed, and the values are needed elsewhere. Using `extend` with `into_iter` //! is the main way that contents of one collection are moved into another. -//! `extend()` automatically calls `into_iter()`, and takes any `T: `[`IntoIterator`]. -//! Calling `collect()` on an iterator itself is also a great way to convert one +//! `extend` automatically calls `into_iter`, and takes any `T: `[`IntoIterator`]. +//! Calling `collect` on an iterator itself is also a great way to convert one //! collection into another. Both of these methods should internally use the //! capacity management tools discussed in the previous section to do this as //! efficiently as possible. @@ -248,9 +248,9 @@ //! ``` //! //! Iterators also provide a series of *adapter* methods for performing common -//! threads to sequences. Among the adapters are functional favorites like `map()`, -//! `fold()`, `skip()` and `take()`. Of particular interest to collections is the -//! `rev()` adapter, that reverses any iterator that supports this operation. Most +//! threads to sequences. Among the adapters are functional favorites like `map`, +//! `fold`, `skip` and `take`. Of particular interest to collections is the +//! `rev` adapter, that reverses any iterator that supports this operation. Most //! collections provide reversible iterators as the way to iterate over them in //! reverse order. //! @@ -263,27 +263,27 @@ //! //! Several other collection methods also return iterators to yield a sequence //! of results but avoid allocating an entire collection to store the result in. -//! This provides maximum flexibility as `collect()` or `extend()` can be called to +//! This provides maximum flexibility as `collect` or `extend` can be called to //! "pipe" the sequence into any collection if desired. Otherwise, the sequence //! can be looped over with a `for` loop. The iterator can also be discarded //! after partial use, preventing the computation of the unused items. //! //! ## Entries //! -//! The `entry()` API is intended to provide an efficient mechanism for +//! The `entry` API is intended to provide an efficient mechanism for //! manipulating the contents of a map conditionally on the presence of a key or //! not. The primary motivating use case for this is to provide efficient //! accumulator maps. For instance, if one wishes to maintain a count of the //! number of times each key has been seen, they will have to perform some //! conditional logic on whether this is the first time the key has been seen or -//! not. Normally, this would require a `find()` followed by an `insert()`, +//! not. Normally, this would require a `find` followed by an `insert`, //! effectively duplicating the search effort on each insertion. //! //! When a user calls `map.entry(&key)`, the map will search for the key and //! then yield a variant of the `Entry` enum. //! //! If a `Vacant(entry)` is yielded, then the key *was not* found. In this case -//! the only valid operation is to `insert()` a value into the entry. When this is +//! the only valid operation is to `insert` a value into the entry. When this is //! done, the vacant entry is consumed and converted into a mutable reference to //! the value that was inserted. This allows for further manipulation of the //! value beyond the lifetime of the search itself. This is useful if complex @@ -291,14 +291,14 @@ //! just inserted. //! //! If an `Occupied(entry)` is yielded, then the key *was* found. In this case, -//! the user has several options: they can `get()`, `insert()` or `remove()` the +//! the user has several options: they can `get`, `insert` or `remove` the //! value of the occupied entry. Additionally, they can convert the occupied //! entry into a mutable reference to its value, providing symmetry to the -//! vacant `insert()` case. +//! vacant `insert` case. //! //! ### Examples //! -//! Here are the two primary ways in which `entry()` is used. First, a simple +//! Here are the two primary ways in which `entry` is used. First, a simple //! example where the logic performed on the values is trivial. //! //! #### Counting the number of times each character in a string occurs @@ -322,7 +322,7 @@ //! ``` //! //! When the logic to be performed on the value is more complex, we may simply -//! use the `entry()` API to ensure that the value is initialized and perform the +//! use the `entry` API to ensure that the value is initialized and perform the //! logic afterwards. //! //! #### Tracking the inebriation of customers at a bar @@ -360,7 +360,7 @@ //! //! # Insert and complex keys //! -//! If we have a more complex key, calls to `insert()` will +//! If we have a more complex key, calls to `insert` will //! not update the value of the key. For example: //! //! ``` @@ -420,15 +420,15 @@ #![stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::Bound; +pub use alloc::Bound; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::{BinaryHeap, BTreeMap, BTreeSet}; +pub use alloc::{BinaryHeap, BTreeMap, BTreeSet}; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::{LinkedList, VecDeque}; +pub use alloc::{LinkedList, VecDeque}; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::{binary_heap, btree_map, btree_set}; +pub use alloc::{binary_heap, btree_map, btree_set}; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::{linked_list, vec_deque}; +pub use alloc::{linked_list, vec_deque}; #[stable(feature = "rust1", since = "1.0.0")] pub use self::hash_map::HashMap; @@ -436,22 +436,20 @@ pub use self::hash_map::HashMap; pub use self::hash_set::HashSet; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::range; +pub use alloc::range; mod hash; #[stable(feature = "rust1", since = "1.0.0")] pub mod hash_map { - //! A hash map implementation which uses linear probing with Robin - //! Hood bucket stealing. + //! A hash map implemented with linear probing and Robin Hood bucket stealing. #[stable(feature = "rust1", since = "1.0.0")] pub use super::hash::map::*; } #[stable(feature = "rust1", since = "1.0.0")] pub mod hash_set { - //! An implementation of a hash set using the underlying representation of a - //! HashMap where the value is (). + //! A hash set implemented as a `HashMap` where the value is `()`. #[stable(feature = "rust1", since = "1.0.0")] pub use super::hash::set::*; } diff --git a/ctr-std/src/lib.rs b/ctr-std/src/lib.rs index cc4825c..8edf3c7 100644 --- a/ctr-std/src/lib.rs +++ b/ctr-std/src/lib.rs @@ -25,9 +25,11 @@ #![feature(integer_atomics)] #![feature(lang_items)] #![feature(macro_reexport)] +#![feature(needs_drop)] #![feature(oom)] #![feature(on_unimplemented)] #![feature(optin_builtin_traits)] +#![feature(placement_new_protocol)] #![feature(prelude_import)] #![feature(raw)] #![feature(rand)] @@ -54,15 +56,14 @@ #[allow(unused)] use prelude::v1::*; -#[macro_reexport(assert, assert_eq, debug_assert, debug_assert_eq, - unreachable, unimplemented, write, writeln, try)] +#[macro_reexport(assert, assert_eq, assert_ne, debug_assert, debug_assert_eq, + debug_assert_ne, unreachable, unimplemented, write, writeln, try)] extern crate core as __core; +#[allow(deprecated)] extern crate rand as core_rand; + #[macro_use] #[macro_reexport(vec, format)] -extern crate collections as core_collections; - -#[allow(deprecated)] extern crate rand as core_rand; extern crate alloc; extern crate std_unicode; extern crate alloc_system; @@ -139,17 +140,17 @@ pub use alloc::boxed; #[stable(feature = "rust1", since = "1.0.0")] pub use alloc::rc; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::borrow; +pub use alloc::borrow; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::fmt; +pub use alloc::fmt; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::slice; +pub use alloc::slice; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::str; +pub use alloc::str; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::string; +pub use alloc::string; #[stable(feature = "rust1", since = "1.0.0")] -pub use core_collections::vec; +pub use alloc::vec; #[stable(feature = "rust1", since = "1.0.0")] pub use std_unicode::char; diff --git a/ctr-std/src/sys/unix/alloc.rs b/ctr-std/src/sys/unix/alloc.rs deleted file mode 100644 index d7eb2f2..0000000 --- a/ctr-std/src/sys/unix/alloc.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use alloc::allocator::{Alloc,Layout,AllocErr,Excess,CannotReallocInPlace}; -use alloc::heap; - -/// Heap allocator that delegates to the default liballoc heap allocator. -/// Its purpose is to override methods while still using the standard alloc api. -#[derive(Copy, Clone, Default, Debug)] -pub struct Heap; - -unsafe impl Alloc for Heap { - #[inline] - unsafe fn alloc(&mut self, layout: Layout) -> Result<*mut u8, AllocErr> { - heap::Heap.alloc(layout) - } - - // A nicer handler for out-of-memory situations than the default one. This - // one prints a message to stderr before aborting. It is critical that this - // code does not allocate any memory since we are in an OOM situation. Any - // errors are ignored while printing since there's nothing we can do about - // them and we are about to exit anyways. - #[inline] - fn oom(&mut self, err: AllocErr) -> ! { - use intrinsics; - use libc::{self, STDERR_FILENO, c_void}; - - let msg = err.description(); - unsafe { - libc::write(STDERR_FILENO, msg.as_ptr() as *const c_void, msg.len()); - libc::write(STDERR_FILENO, "\n".as_ptr() as *const c_void, 1); - intrinsics::abort(); - } - } - - #[inline] - unsafe fn dealloc(&mut self, ptr: *mut u8, layout: Layout) { - heap::Heap.dealloc(ptr, layout) - } - - #[inline] - fn usable_size(&self, layout: &Layout) -> (usize, usize) { - heap::Heap.usable_size(layout) - } - - #[inline] - unsafe fn realloc(&mut self, - ptr: *mut u8, - layout: Layout, - new_layout: Layout) - -> Result<*mut u8, AllocErr> - { - heap::Heap.realloc(ptr, layout, new_layout) - } - - #[inline] - unsafe fn alloc_zeroed(&mut self, layout: Layout) -> Result<*mut u8, AllocErr> { - heap::Heap.alloc_zeroed(layout) - } - - #[inline] - unsafe fn alloc_excess(&mut self, layout: Layout) -> Result { - heap::Heap.alloc_excess(layout) - } - - #[inline] - unsafe fn realloc_excess(&mut self, - ptr: *mut u8, - layout: Layout, - new_layout: Layout) -> Result - { - heap::Heap.realloc_excess(ptr, layout, new_layout) - } - - #[inline] - unsafe fn grow_in_place(&mut self, - ptr: *mut u8, - layout: Layout, - new_layout: Layout) - -> Result<(), CannotReallocInPlace> - { - heap::Heap.grow_in_place(ptr, layout, new_layout) - } - - #[inline] - unsafe fn shrink_in_place(&mut self, - ptr: *mut u8, - layout: Layout, - new_layout: Layout) -> Result<(), CannotReallocInPlace> - { - heap::Heap.shrink_in_place(ptr, layout, new_layout) - } -} diff --git a/ctr-std/src/sys/unix/mod.rs b/ctr-std/src/sys/unix/mod.rs index 9d57045..cd583b5 100644 --- a/ctr-std/src/sys/unix/mod.rs +++ b/ctr-std/src/sys/unix/mod.rs @@ -29,7 +29,6 @@ pub mod thread; pub mod rand; pub mod thread_local; pub mod time; -pub mod alloc; #[cfg(not(test))] pub fn init() { diff --git a/ctr-std/src/sys_common/mod.rs b/ctr-std/src/sys_common/mod.rs index 936ff80..c6e81cc 100644 --- a/ctr-std/src/sys_common/mod.rs +++ b/ctr-std/src/sys_common/mod.rs @@ -76,10 +76,6 @@ pub fn at_exit(f: F) -> Result<(), ()> { if at_exit_imp::push(Box::new(f)) {Ok(())} else {Err(())} } -macro_rules! rtabort { - ($($t:tt)*) => (::sys_common::util::abort(format_args!($($t)*))) -} - // Computes (value*numer)/denom without overflow, as long as both // (numer*denom) and the overall result fit into i64 (which is the case // for our time conversions). From e0221d9f29e836321dffe27f7e1138303afbd950 Mon Sep 17 00:00:00 2001 From: panicbit Date: Mon, 10 Jul 2017 17:31:44 +0200 Subject: [PATCH 7/9] Remove ctr-libc dep from ctr-std --- ctr-std/Xargo.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ctr-std/Xargo.toml b/ctr-std/Xargo.toml index e1901fa..c078bcc 100644 --- a/ctr-std/Xargo.toml +++ b/ctr-std/Xargo.toml @@ -3,7 +3,3 @@ [dependencies.rand] [dependencies.alloc_system] - -[dependencies.ctr-libc] -path = "../ctr-libc" -stage = 1 From 7b7d98d3d328fee33b0d0ab01637a0065b47dcee Mon Sep 17 00:00:00 2001 From: Fenrir Date: Tue, 18 Jul 2017 23:07:35 -0600 Subject: [PATCH 8/9] Use alloc_system3ds again --- ctr-std/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ctr-std/Cargo.toml b/ctr-std/Cargo.toml index 97f1c6a..ebb6124 100644 --- a/ctr-std/Cargo.toml +++ b/ctr-std/Cargo.toml @@ -6,6 +6,10 @@ license = "MIT/Apache 2.0" [dependencies.compiler_builtins] git = "https://github.com/rust-lang-nursery/compiler-builtins" +[dependencies.alloc_system] +git = "https://github.com/FenrirWolf/alloc_system3ds/" +branch = "new-allocator" + [dependencies.libc] version = "0.2" default-features = false From 54d7fc6c88d8e884ed95bdefc3493b09e4e43573 Mon Sep 17 00:00:00 2001 From: panicbit Date: Thu, 20 Jul 2017 21:10:09 +0200 Subject: [PATCH 9/9] Use rust3ds/alloc_system3ds again --- ctr-std/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ctr-std/Cargo.toml b/ctr-std/Cargo.toml index ebb6124..1a883f5 100644 --- a/ctr-std/Cargo.toml +++ b/ctr-std/Cargo.toml @@ -7,8 +7,7 @@ license = "MIT/Apache 2.0" git = "https://github.com/rust-lang-nursery/compiler-builtins" [dependencies.alloc_system] -git = "https://github.com/FenrirWolf/alloc_system3ds/" -branch = "new-allocator" +git = "https://github.com/rust3ds/alloc_system3ds" [dependencies.libc] version = "0.2"