diff --git a/ctr-std/Xargo.toml b/ctr-std/Xargo.toml index dc3fd74..7d069ba 100644 --- a/ctr-std/Xargo.toml +++ b/ctr-std/Xargo.toml @@ -1,5 +1,7 @@ [dependencies.collections] +[dependencies.rand] + [dependencies.ctr-libc] path = "../ctr-libc" stage = 1 diff --git a/ctr-std/src/collections/hash/bench.rs b/ctr-std/src/collections/hash/bench.rs new file mode 100644 index 0000000..ff6cb79 --- /dev/null +++ b/ctr-std/src/collections/hash/bench.rs @@ -0,0 +1,128 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] + +extern crate test; + +use self::test::Bencher; + +#[bench] +fn new_drop(b: &mut Bencher) { + use super::map::HashMap; + + b.iter(|| { + let m: HashMap = HashMap::new(); + assert_eq!(m.len(), 0); + }) +} + +#[bench] +fn new_insert_drop(b: &mut Bencher) { + use super::map::HashMap; + + b.iter(|| { + let mut m = HashMap::new(); + m.insert(0, 0); + assert_eq!(m.len(), 1); + }) +} + +#[bench] +fn grow_by_insertion(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + let mut k = 1001; + + b.iter(|| { + m.insert(k, k); + k += 1; + }); +} + +#[bench] +fn find_existing(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + b.iter(|| { + for i in 1..1001 { + m.contains_key(&i); + } + }); +} + +#[bench] +fn find_nonexisting(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + b.iter(|| { + for i in 1001..2001 { + m.contains_key(&i); + } + }); +} + +#[bench] +fn hashmap_as_queue(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + let mut k = 1; + + b.iter(|| { + m.remove(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); +} + +#[bench] +fn get_remove_insert(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + let mut k = 1; + + b.iter(|| { + m.get(&(k + 400)); + m.get(&(k + 2000)); + m.remove(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }) +} diff --git a/ctr-std/src/collections/hash/map.rs b/ctr-std/src/collections/hash/map.rs new file mode 100644 index 0000000..0b310eb --- /dev/null +++ b/ctr-std/src/collections/hash/map.rs @@ -0,0 +1,3073 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use self::Entry::*; +use self::VacantEntryState::*; + +use cell::Cell; +use borrow::Borrow; +use cmp::max; +use fmt::{self, Debug}; +#[allow(deprecated)] +use hash::{Hash, Hasher, BuildHasher, SipHasher13}; +use iter::{FromIterator, FusedIterator}; +use mem::{self, replace}; +use ops::{Deref, Index}; +use rand::{self, Rng}; + +use super::table::{self, Bucket, EmptyBucket, FullBucket, FullBucketMut, RawTable, SafeHash}; +use super::table::BucketState::{Empty, Full}; + +const MIN_NONZERO_RAW_CAPACITY: usize = 32; // must be a power of two + +/// The default behavior of HashMap implements a maximum load factor of 90.9%. +#[derive(Clone)] +struct DefaultResizePolicy; + +impl DefaultResizePolicy { + fn new() -> DefaultResizePolicy { + DefaultResizePolicy + } + + /// A hash map's "capacity" is the number of elements it can hold without + /// being resized. Its "raw capacity" is the number of slots required to + /// provide that capacity, accounting for maximum loading. The raw capacity + /// is always zero or a power of two. + #[inline] + fn raw_capacity(&self, len: usize) -> usize { + if len == 0 { + 0 + } else { + // 1. Account for loading: `raw_capacity >= len * 1.1`. + // 2. Ensure it is a power of two. + // 3. Ensure it is at least the minimum size. + let mut raw_cap = len * 11 / 10; + assert!(raw_cap >= len, "raw_cap overflow"); + raw_cap = raw_cap.checked_next_power_of_two().expect("raw_capacity overflow"); + raw_cap = max(MIN_NONZERO_RAW_CAPACITY, raw_cap); + raw_cap + } + } + + /// The capacity of the given raw capacity. + #[inline] + fn capacity(&self, raw_cap: usize) -> usize { + // This doesn't have to be checked for overflow since allocation size + // in bytes will overflow earlier than multiplication by 10. + // + // As per https://github.com/rust-lang/rust/pull/30991 this is updated + // to be: (raw_cap * den + den - 1) / num + (raw_cap * 10 + 10 - 1) / 11 + } +} + +// The main performance trick in this hashmap is called Robin Hood Hashing. +// It gains its excellent performance from one essential operation: +// +// If an insertion collides with an existing element, and that element's +// "probe distance" (how far away the element is from its ideal location) +// is higher than how far we've already probed, swap the elements. +// +// This massively lowers variance in probe distance, and allows us to get very +// high load factors with good performance. The 90% load factor I use is rather +// conservative. +// +// > Why a load factor of approximately 90%? +// +// In general, all the distances to initial buckets will converge on the mean. +// At a load factor of α, the odds of finding the target bucket after k +// probes is approximately 1-α^k. If we set this equal to 50% (since we converge +// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round +// this down to make the math easier on the CPU and avoid its FPU. +// Since on average we start the probing in the middle of a cache line, this +// strategy pulls in two cache lines of hashes on every lookup. I think that's +// pretty good, but if you want to trade off some space, it could go down to one +// cache line on average with an α of 0.84. +// +// > Wait, what? Where did you get 1-α^k from? +// +// On the first probe, your odds of a collision with an existing element is α. +// The odds of doing this twice in a row is approximately α^2. For three times, +// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT +// colliding after k tries is 1-α^k. +// +// The paper from 1986 cited below mentions an implementation which keeps track +// of the distance-to-initial-bucket histogram. This approach is not suitable +// for modern architectures because it requires maintaining an internal data +// structure. This allows very good first guesses, but we are most concerned +// with guessing entire cache lines, not individual indexes. Furthermore, array +// accesses are no longer linear and in one direction, as we have now. There +// is also memory and cache pressure that this would entail that would be very +// difficult to properly see in a microbenchmark. +// +// ## Future Improvements (FIXME!) +// +// Allow the load factor to be changed dynamically and/or at initialization. +// +// Also, would it be possible for us to reuse storage when growing the +// underlying table? This is exactly the use case for 'realloc', and may +// be worth exploring. +// +// ## Future Optimizations (FIXME!) +// +// Another possible design choice that I made without any real reason is +// parameterizing the raw table over keys and values. Technically, all we need +// is the size and alignment of keys and values, and the code should be just as +// efficient (well, we might need one for power-of-two size and one for not...). +// This has the potential to reduce code bloat in rust executables, without +// really losing anything except 4 words (key size, key alignment, val size, +// val alignment) which can be passed in to every call of a `RawTable` function. +// This would definitely be an avenue worth exploring if people start complaining +// about the size of rust executables. +// +// Annotate exceedingly likely branches in `table::make_hash` +// and `search_hashed` to reduce instruction cache pressure +// and mispredictions once it becomes possible (blocked on issue #11092). +// +// Shrinking the table could simply reallocate in place after moving buckets +// to the first half. +// +// The growth algorithm (fragment of the Proof of Correctness) +// -------------------- +// +// The growth algorithm is basically a fast path of the naive reinsertion- +// during-resize algorithm. Other paths should never be taken. +// +// Consider growing a robin hood hashtable of capacity n. Normally, we do this +// by allocating a new table of capacity `2n`, and then individually reinsert +// each element in the old table into the new one. This guarantees that the +// new table is a valid robin hood hashtable with all the desired statistical +// properties. Remark that the order we reinsert the elements in should not +// matter. For simplicity and efficiency, we will consider only linear +// reinsertions, which consist of reinserting all elements in the old table +// into the new one by increasing order of index. However we will not be +// starting our reinsertions from index 0 in general. If we start from index +// i, for the purpose of reinsertion we will consider all elements with real +// index j < i to have virtual index n + j. +// +// Our hash generation scheme consists of generating a 64-bit hash and +// truncating the most significant bits. When moving to the new table, we +// simply introduce a new bit to the front of the hash. Therefore, if an +// elements has ideal index i in the old table, it can have one of two ideal +// locations in the new table. If the new bit is 0, then the new ideal index +// is i. If the new bit is 1, then the new ideal index is n + i. Intuitively, +// we are producing two independent tables of size n, and for each element we +// independently choose which table to insert it into with equal probability. +// However the rather than wrapping around themselves on overflowing their +// indexes, the first table overflows into the first, and the first into the +// second. Visually, our new table will look something like: +// +// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy] +// +// Where x's are elements inserted into the first table, y's are elements +// inserted into the second, and _'s are empty sections. We now define a few +// key concepts that we will use later. Note that this is a very abstract +// perspective of the table. A real resized table would be at least half +// empty. +// +// Theorem: A linear robin hood reinsertion from the first ideal element +// produces identical results to a linear naive reinsertion from the same +// element. +// +// FIXME(Gankro, pczarn): review the proof and put it all in a separate README.md + +/// A hash map implementation which uses linear probing with Robin Hood bucket +/// stealing. +/// +/// By default, `HashMap` uses a hashing algorithm selected to provide +/// resistance against HashDoS attacks. The algorithm is randomly seeded, and a +/// reasonable best-effort is made to generate this seed from a high quality, +/// secure source of randomness provided by the host without blocking the +/// program. Because of this, the randomness of the seed is dependant on the +/// quality of the system's random number generator at the time it is created. +/// In particular, seeds generated when the system's entropy pool is abnormally +/// low such as during system boot may be of a lower quality. +/// +/// The default hashing algorithm is currently SipHash 1-3, though this is +/// subject to change at any point in the future. While its performance is very +/// competitive for medium sized keys, other hashing algorithms will outperform +/// it for small keys such as integers as well as large keys such as long +/// strings, though those algorithms will typically *not* protect against +/// attacks such as HashDoS. +/// +/// The hashing algorithm can be replaced on a per-`HashMap` basis using the +/// `HashMap::default`, `HashMap::with_hasher`, and +/// `HashMap::with_capacity_and_hasher` methods. Many alternative algorithms +/// are available on crates.io, such as the `fnv` crate. +/// +/// It is required that the keys implement the [`Eq`] and [`Hash`] traits, although +/// this can frequently be achieved by using `#[derive(PartialEq, Eq, Hash)]`. +/// If you implement these yourself, it is important that the following +/// property holds: +/// +/// ```text +/// k1 == k2 -> hash(k1) == hash(k2) +/// ``` +/// +/// In other words, if two keys are equal, their hashes must be equal. +/// +/// It is a logic error for a key to be modified in such a way that the key's +/// hash, as determined by the [`Hash`] trait, or its equality, as determined by +/// the [`Eq`] trait, changes while it is in the map. This is normally only +/// possible through [`Cell`], [`RefCell`], global state, I/O, or unsafe code. +/// +/// Relevant papers/articles: +/// +/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf) +/// 2. Emmanuel Goossaert. ["Robin Hood +/// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/) +/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift +/// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/) +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashMap; +/// +/// // type inference lets us omit an explicit type signature (which +/// // would be `HashMap<&str, &str>` in this example). +/// let mut book_reviews = HashMap::new(); +/// +/// // review some books. +/// book_reviews.insert("Adventures of Huckleberry Finn", "My favorite book."); +/// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece."); +/// book_reviews.insert("Pride and Prejudice", "Very enjoyable."); +/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot."); +/// +/// // check for a specific one. +/// if !book_reviews.contains_key("Les Misérables") { +/// println!("We've got {} reviews, but Les Misérables ain't one.", +/// book_reviews.len()); +/// } +/// +/// // oops, this review has a lot of spelling mistakes, let's delete it. +/// book_reviews.remove("The Adventures of Sherlock Holmes"); +/// +/// // look up the values associated with some keys. +/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"]; +/// for book in &to_find { +/// match book_reviews.get(book) { +/// Some(review) => println!("{}: {}", book, review), +/// None => println!("{} is unreviewed.", book) +/// } +/// } +/// +/// // iterate over everything. +/// for (book, review) in &book_reviews { +/// println!("{}: \"{}\"", book, review); +/// } +/// ``` +/// +/// `HashMap` also implements an [`Entry API`](#method.entry), which allows +/// for more complex methods of getting, setting, updating and removing keys and +/// their values: +/// +/// ``` +/// use std::collections::HashMap; +/// +/// // type inference lets us omit an explicit type signature (which +/// // would be `HashMap<&str, u8>` in this example). +/// let mut player_stats = HashMap::new(); +/// +/// fn random_stat_buff() -> u8 { +/// // could actually return some random value here - let's just return +/// // some fixed value for now +/// 42 +/// } +/// +/// // insert a key only if it doesn't already exist +/// player_stats.entry("health").or_insert(100); +/// +/// // insert a key using a function that provides a new value only if it +/// // doesn't already exist +/// player_stats.entry("defence").or_insert_with(random_stat_buff); +/// +/// // update a key, guarding against the key possibly not being set +/// let stat = player_stats.entry("attack").or_insert(100); +/// *stat += random_stat_buff(); +/// ``` +/// +/// The easiest way to use `HashMap` with a custom type as key is to derive [`Eq`] and [`Hash`]. +/// We must also derive [`PartialEq`]. +/// +/// [`Eq`]: ../../std/cmp/trait.Eq.html +/// [`Hash`]: ../../std/hash/trait.Hash.html +/// [`PartialEq`]: ../../std/cmp/trait.PartialEq.html +/// [`RefCell`]: ../../std/cell/struct.RefCell.html +/// [`Cell`]: ../../std/cell/struct.Cell.html +/// +/// ``` +/// use std::collections::HashMap; +/// +/// #[derive(Hash, Eq, PartialEq, Debug)] +/// struct Viking { +/// name: String, +/// country: String, +/// } +/// +/// impl Viking { +/// /// Create a new Viking. +/// fn new(name: &str, country: &str) -> Viking { +/// Viking { name: name.to_string(), country: country.to_string() } +/// } +/// } +/// +/// // Use a HashMap to store the vikings' health points. +/// let mut vikings = HashMap::new(); +/// +/// vikings.insert(Viking::new("Einar", "Norway"), 25); +/// vikings.insert(Viking::new("Olaf", "Denmark"), 24); +/// vikings.insert(Viking::new("Harald", "Iceland"), 12); +/// +/// // Use derived implementation to print the status of the vikings. +/// for (viking, health) in &vikings { +/// println!("{:?} has {} hp", viking, health); +/// } +/// ``` +/// +/// A HashMap with fixed list of elements can be initialized from an array: +/// +/// ``` +/// use std::collections::HashMap; +/// +/// fn main() { +/// let timber_resources: HashMap<&str, i32> = +/// [("Norway", 100), +/// ("Denmark", 50), +/// ("Iceland", 10)] +/// .iter().cloned().collect(); +/// // use the values stored in map +/// } +/// ``` + +#[derive(Clone)] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct HashMap { + // All hashes are keyed on these values, to prevent hash collision attacks. + hash_builder: S, + + table: RawTable, + + resize_policy: DefaultResizePolicy, +} + +/// Search for a pre-hashed key. +#[inline] +fn search_hashed(table: M, hash: SafeHash, mut is_match: F) -> InternalEntry + where M: Deref>, + F: FnMut(&K) -> bool +{ + // This is the only function where capacity can be zero. To avoid + // undefined behavior when Bucket::new gets the raw bucket in this + // case, immediately return the appropriate search result. + if table.capacity() == 0 { + return InternalEntry::TableIsEmpty; + } + + let size = table.size(); + let mut probe = Bucket::new(table, hash); + let mut displacement = 0; + + loop { + let full = match probe.peek() { + Empty(bucket) => { + // Found a hole! + return InternalEntry::Vacant { + hash: hash, + elem: NoElem(bucket), + }; + } + Full(bucket) => bucket, + }; + + let probe_displacement = full.displacement(); + + if probe_displacement < displacement { + // Found a luckier bucket than me. + // We can finish the search early if we hit any bucket + // with a lower distance to initial bucket than we've probed. + return InternalEntry::Vacant { + hash: hash, + elem: NeqElem(full, probe_displacement), + }; + } + + // If the hash doesn't match, it can't be this one.. + if hash == full.hash() { + // If the key doesn't match, it can't be this one.. + if is_match(full.read().0) { + return InternalEntry::Occupied { elem: full }; + } + } + displacement += 1; + probe = full.next(); + debug_assert!(displacement <= size); + } +} + +fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { + let (empty, retkey, retval) = starting_bucket.take(); + let mut gap = match empty.gap_peek() { + Some(b) => b, + None => return (retkey, retval), + }; + + while gap.full().displacement() != 0 { + gap = match gap.shift() { + Some(b) => b, + None => break, + }; + } + + // Now we've done all our shifting. Return the value we grabbed earlier. + (retkey, retval) +} + +/// Perform robin hood bucket stealing at the given `bucket`. You must +/// also pass that bucket's displacement so we don't have to recalculate it. +/// +/// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. +fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, + mut displacement: usize, + mut hash: SafeHash, + mut key: K, + mut val: V) + -> &'a mut V { + let starting_index = bucket.index(); + let size = bucket.table().size(); + // Save the *starting point*. + let mut bucket = bucket.stash(); + // There can be at most `size - dib` buckets to displace, because + // in the worst case, there are `size` elements and we already are + // `displacement` buckets away from the initial one. + let idx_end = starting_index + size - bucket.displacement(); + + loop { + let (old_hash, old_key, old_val) = bucket.replace(hash, key, val); + hash = old_hash; + key = old_key; + val = old_val; + + loop { + displacement += 1; + let probe = bucket.next(); + debug_assert!(probe.index() != idx_end); + + let full_bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + let bucket = bucket.put(hash, key, val); + // Now that it's stolen, just read the value's pointer + // right out of the table! Go back to the *starting point*. + // + // This use of `into_table` is misleading. It turns the + // bucket, which is a FullBucket on top of a + // FullBucketMut, into just one FullBucketMut. The "table" + // refers to the inner FullBucketMut in this context. + return bucket.into_table().into_mut_refs().1; + } + Full(bucket) => bucket, + }; + + let probe_displacement = full_bucket.displacement(); + + bucket = full_bucket; + + // Robin hood! Steal the spot. + if probe_displacement < displacement { + displacement = probe_displacement; + break; + } + } + } +} + +impl HashMap + where K: Eq + Hash, + S: BuildHasher +{ + fn make_hash(&self, x: &X) -> SafeHash + where X: Hash + { + table::make_hash(&self.hash_builder, x) + } + + /// Search for a key, yielding the index if it's found in the hashtable. + /// If you already have the hash for the key lying around, use + /// search_hashed. + #[inline] + fn search<'a, Q: ?Sized>(&'a self, q: &Q) -> InternalEntry> + where K: Borrow, + Q: Eq + Hash + { + let hash = self.make_hash(q); + search_hashed(&self.table, hash, |k| q.eq(k.borrow())) + } + + #[inline] + fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) -> InternalEntry> + where K: Borrow, + Q: Eq + Hash + { + let hash = self.make_hash(q); + search_hashed(&mut self.table, hash, |k| q.eq(k.borrow())) + } + + // The caller should ensure that invariants by Robin Hood Hashing hold + // and that there's space in the underlying table. + fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { + let raw_cap = self.raw_capacity(); + let mut buckets = Bucket::new(&mut self.table, hash); + // note that buckets.index() keeps increasing + // even if the pointer wraps back to the first bucket. + let limit_bucket = buckets.index() + raw_cap; + + loop { + // We don't need to compare hashes for value swap. + // Not even DIBs for Robin Hood. + buckets = match buckets.peek() { + Empty(empty) => { + empty.put(hash, k, v); + return; + } + Full(b) => b.into_bucket(), + }; + buckets.next(); + debug_assert!(buckets.index() < limit_bucket); + } + } +} + +impl HashMap { + /// Creates an empty `HashMap`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, isize> = HashMap::new(); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new() -> HashMap { + Default::default() + } + + /// Creates an empty `HashMap` with the specified capacity. + /// + /// The hash map will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash map will not allocate. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, isize> = HashMap::with_capacity(10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> HashMap { + HashMap::with_capacity_and_hasher(capacity, Default::default()) + } +} + +impl HashMap + where K: Eq + Hash, + S: BuildHasher +{ + /// Creates an empty `HashMap` which will use the given hash builder to hash + /// keys. + /// + /// The created map has the default initial capacity. + /// + /// Warning: `hash_builder` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut map = HashMap::with_hasher(s); + /// map.insert(1, 2); + /// ``` + #[inline] + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub fn with_hasher(hash_builder: S) -> HashMap { + HashMap { + hash_builder: hash_builder, + resize_policy: DefaultResizePolicy::new(), + table: RawTable::new(0), + } + } + + /// Creates an empty `HashMap` with the specified capacity, using `hasher` + /// to hash the keys. + /// + /// The hash map will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash map will not allocate. + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut map = HashMap::with_capacity_and_hasher(10, s); + /// map.insert(1, 2); + /// ``` + #[inline] + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> HashMap { + let resize_policy = DefaultResizePolicy::new(); + let raw_cap = resize_policy.raw_capacity(capacity); + HashMap { + hash_builder: hash_builder, + resize_policy: resize_policy, + table: RawTable::new(raw_cap), + } + } + + /// Returns a reference to the map's hasher. + #[stable(feature = "hashmap_public_hasher", since = "1.9.0")] + pub fn hasher(&self) -> &S { + &self.hash_builder + } + + /// Returns the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the `HashMap` might be able to hold + /// more, but is guaranteed to be able to hold at least this many. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// let map: HashMap = HashMap::with_capacity(100); + /// assert!(map.capacity() >= 100); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.resize_policy.capacity(self.raw_capacity()) + } + + /// Returns the hash map's raw capacity. + #[inline] + fn raw_capacity(&self) -> usize { + self.table.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `HashMap`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new allocation size overflows `usize`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, isize> = HashMap::new(); + /// map.reserve(10); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + let remaining = self.capacity() - self.len(); // this can't overflow + if remaining < additional { + let min_cap = self.len().checked_add(additional).expect("reserve overflow"); + let raw_cap = self.resize_policy.raw_capacity(min_cap); + self.resize(raw_cap); + } + } + + /// Resizes the internal vectors to a new capacity. It's your + /// responsibility to: + /// 1) Ensure `new_raw_cap` is enough for all the elements, accounting + /// for the load factor. + /// 2) Ensure `new_raw_cap` is a power of two or zero. + fn resize(&mut self, new_raw_cap: usize) { + assert!(self.table.size() <= new_raw_cap); + assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0); + + let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap)); + let old_size = old_table.size(); + + if old_table.capacity() == 0 || old_table.size() == 0 { + return; + } + + // Grow the table. + // Specialization of the other branch. + let mut bucket = Bucket::first(&mut old_table); + + // "So a few of the first shall be last: for many be called, + // but few chosen." + // + // We'll most likely encounter a few buckets at the beginning that + // have their initial buckets near the end of the table. They were + // placed at the beginning as the probe wrapped around the table + // during insertion. We must skip forward to a bucket that won't + // get reinserted too early and won't unfairly steal others spot. + // This eliminates the need for robin hood. + loop { + bucket = match bucket.peek() { + Full(full) => { + if full.displacement() == 0 { + // This bucket occupies its ideal spot. + // It indicates the start of another "cluster". + bucket = full.into_bucket(); + break; + } + // Leaving this bucket in the last cluster for later. + full.into_bucket() + } + Empty(b) => { + // Encountered a hole between clusters. + b.into_bucket() + } + }; + bucket.next(); + } + + // This is how the buckets might be laid out in memory: + // ($ marks an initialized bucket) + // ________________ + // |$$$_$$$$$$_$$$$$| + // + // But we've skipped the entire initial cluster of buckets + // and will continue iteration in this order: + // ________________ + // |$$$$$$_$$$$$ + // ^ wrap around once end is reached + // ________________ + // $$$_____________| + // ^ exit once table.size == 0 + loop { + bucket = match bucket.peek() { + Full(bucket) => { + let h = bucket.hash(); + let (b, k, v) = bucket.take(); + self.insert_hashed_ordered(h, k, v); + if b.table().size() == 0 { + break; + } + b.into_bucket() + } + Empty(b) => b.into_bucket(), + }; + bucket.next(); + } + + assert_eq!(self.table.size(), old_size); + } + + /// Shrinks the capacity of the map as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap = HashMap::with_capacity(100); + /// map.insert(1, 2); + /// map.insert(3, 4); + /// assert!(map.capacity() >= 100); + /// map.shrink_to_fit(); + /// assert!(map.capacity() >= 2); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + let new_raw_cap = self.resize_policy.raw_capacity(self.len()); + if self.raw_capacity() != new_raw_cap { + let old_table = replace(&mut self.table, RawTable::new(new_raw_cap)); + let old_size = old_table.size(); + + // Shrink the table. Naive algorithm for resizing: + for (h, k, v) in old_table.into_iter() { + self.insert_hashed_nocheck(h, k, v); + } + + debug_assert_eq!(self.table.size(), old_size); + } + } + + /// Insert a pre-hashed key-value pair, without first checking + /// that there's enough room in the buckets. Returns a reference to the + /// newly insert value. + /// + /// If the key already exists, the hashtable will be returned untouched + /// and a reference to the existing element will be returned. + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { + let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k); + match entry { + Some(Occupied(mut elem)) => Some(elem.insert(v)), + Some(Vacant(elem)) => { + elem.insert(v); + None + } + None => unreachable!(), + } + } + + /// An iterator visiting all keys in arbitrary order. + /// Iterator element type is `&'a K`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for key in map.keys() { + /// println!("{}", key); + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn keys(&self) -> Keys { + Keys { inner: self.iter() } + } + + /// An iterator visiting all values in arbitrary order. + /// Iterator element type is `&'a V`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for val in map.values() { + /// println!("{}", val); + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn values(&self) -> Values { + Values { inner: self.iter() } + } + + /// An iterator visiting all values mutably in arbitrary order. + /// Iterator element type is `&'a mut V`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for val in map.values_mut() { + /// *val = *val + 10; + /// } + /// + /// for val in map.values() { + /// println!("{}", val); + /// } + /// ``` + #[stable(feature = "map_values_mut", since = "1.10.0")] + pub fn values_mut(&mut self) -> ValuesMut { + ValuesMut { inner: self.iter_mut() } + } + + /// An iterator visiting all key-value pairs in arbitrary order. + /// Iterator element type is `(&'a K, &'a V)`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for (key, val) in map.iter() { + /// println!("key: {} val: {}", key, val); + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn iter(&self) -> Iter { + Iter { inner: self.table.iter() } + } + + /// An iterator visiting all key-value pairs in arbitrary order, + /// with mutable references to the values. + /// Iterator element type is `(&'a K, &'a mut V)`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// // Update all values + /// for (_, val) in map.iter_mut() { + /// *val *= 2; + /// } + /// + /// for (key, val) in &map { + /// println!("key: {} val: {}", key, val); + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn iter_mut(&mut self) -> IterMut { + IterMut { inner: self.table.iter_mut() } + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut letters = HashMap::new(); + /// + /// for ch in "a short treatise on fungi".chars() { + /// let counter = letters.entry(ch).or_insert(0); + /// *counter += 1; + /// } + /// + /// assert_eq!(letters[&'s'], 2); + /// assert_eq!(letters[&'t'], 3); + /// assert_eq!(letters[&'u'], 1); + /// assert_eq!(letters.get(&'y'), None); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn entry(&mut self, key: K) -> Entry { + // Gotta resize now. + self.reserve(1); + self.search_mut(&key).into_entry(key).expect("unreachable") + } + + /// Returns the number of elements in the map. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// assert_eq!(a.len(), 0); + /// a.insert(1, "a"); + /// assert_eq!(a.len(), 1); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.table.size() + } + + /// Returns true if the map contains no elements. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// assert!(a.is_empty()); + /// a.insert(1, "a"); + /// assert!(!a.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Clears the map, returning all key-value pairs as an iterator. Keeps the + /// allocated memory for reuse. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// a.insert(1, "a"); + /// a.insert(2, "b"); + /// + /// for (k, v) in a.drain().take(1) { + /// assert!(k == 1 || k == 2); + /// assert!(v == "a" || v == "b"); + /// } + /// + /// assert!(a.is_empty()); + /// ``` + #[inline] + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain(&mut self) -> Drain { + Drain { inner: self.table.drain() } + } + + /// Clears the map, removing all key-value pairs. Keeps the allocated memory + /// for reuse. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// a.insert(1, "a"); + /// a.clear(); + /// assert!(a.is_empty()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn clear(&mut self) { + self.drain(); + } + + /// Returns a reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.get(&1), Some(&"a")); + /// assert_eq!(map.get(&2), None); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get(&self, k: &Q) -> Option<&V> + where K: Borrow, + Q: Hash + Eq + { + self.search(k).into_occupied_bucket().map(|bucket| bucket.into_refs().1) + } + + /// Returns true if the map contains a value for the specified key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.contains_key(&1), true); + /// assert_eq!(map.contains_key(&2), false); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn contains_key(&self, k: &Q) -> bool + where K: Borrow, + Q: Hash + Eq + { + self.search(k).into_occupied_bucket().is_some() + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// if let Some(x) = map.get_mut(&1) { + /// *x = "b"; + /// } + /// assert_eq!(map[&1], "b"); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> + where K: Borrow, + Q: Hash + Eq + { + self.search_mut(k).into_occupied_bucket().map(|bucket| bucket.into_mut_refs().1) + } + + /// Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, `None` is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. See the [module-level + /// documentation] for more. + /// + /// [module-level documentation]: index.html#insert-and-complex-keys + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.insert(37, "a"), None); + /// assert_eq!(map.is_empty(), false); + /// + /// map.insert(37, "b"); + /// assert_eq!(map.insert(37, "c"), Some("b")); + /// assert_eq!(map[&37], "c"); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, k: K, v: V) -> Option { + let hash = self.make_hash(&k); + self.reserve(1); + self.insert_hashed_nocheck(hash, k, v) + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.remove(&1), Some("a")); + /// assert_eq!(map.remove(&1), None); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, k: &Q) -> Option + where K: Borrow, + Q: Hash + Eq + { + if self.table.size() == 0 { + return None; + } + + self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket).1) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for HashMap + where K: Eq + Hash, + V: PartialEq, + S: BuildHasher +{ + fn eq(&self, other: &HashMap) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for HashMap + where K: Eq + Hash, + V: Eq, + S: BuildHasher +{ +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Debug for HashMap + where K: Eq + Hash + Debug, + V: Debug, + S: BuildHasher +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_map().entries(self.iter()).finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for HashMap + where K: Eq + Hash, + S: BuildHasher + Default +{ + /// Creates an empty `HashMap`, with the `Default` value for the hasher. + fn default() -> HashMap { + HashMap::with_hasher(Default::default()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, Q: ?Sized, V, S> Index<&'a Q> for HashMap + where K: Eq + Hash + Borrow, + Q: Eq + Hash, + S: BuildHasher +{ + type Output = V; + + #[inline] + fn index(&self, index: &Q) -> &V { + self.get(index).expect("no entry found for key") + } +} + +/// HashMap iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Iter<'a, K: 'a, V: 'a> { + inner: table::Iter<'a, K, V>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Clone for Iter<'a, K, V> { + fn clone(&self) -> Iter<'a, K, V> { + Iter { inner: self.inner.clone() } + } +} + +/// HashMap mutable values iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct IterMut<'a, K: 'a, V: 'a> { + inner: table::IterMut<'a, K, V>, +} + +/// HashMap move iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct IntoIter { + inner: table::IntoIter, +} + +/// HashMap keys iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Keys<'a, K: 'a, V: 'a> { + inner: Iter<'a, K, V>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Clone for Keys<'a, K, V> { + fn clone(&self) -> Keys<'a, K, V> { + Keys { inner: self.inner.clone() } + } +} + +/// HashMap values iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Values<'a, K: 'a, V: 'a> { + inner: Iter<'a, K, V>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Clone for Values<'a, K, V> { + fn clone(&self) -> Values<'a, K, V> { + Values { inner: self.inner.clone() } + } +} + +/// HashMap drain iterator. +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain<'a, K: 'a, V: 'a> { + inner: table::Drain<'a, K, V>, +} + +/// Mutable HashMap values iterator. +#[stable(feature = "map_values_mut", since = "1.10.0")] +pub struct ValuesMut<'a, K: 'a, V: 'a> { + inner: IterMut<'a, K, V>, +} + +enum InternalEntry { + Occupied { elem: FullBucket }, + Vacant { + hash: SafeHash, + elem: VacantEntryState, + }, + TableIsEmpty, +} + +impl InternalEntry { + #[inline] + fn into_occupied_bucket(self) -> Option> { + match self { + InternalEntry::Occupied { elem } => Some(elem), + _ => None, + } + } +} + +impl<'a, K, V> InternalEntry> { + #[inline] + fn into_entry(self, key: K) -> Option> { + match self { + InternalEntry::Occupied { elem } => { + Some(Occupied(OccupiedEntry { + key: Some(key), + elem: elem, + })) + } + InternalEntry::Vacant { hash, elem } => { + Some(Vacant(VacantEntry { + hash: hash, + key: key, + elem: elem, + })) + } + InternalEntry::TableIsEmpty => None, + } + } +} + +/// A view into a single location in a map, which may be vacant or occupied. +/// This enum is constructed from the [`entry`] method on [`HashMap`]. +/// +/// [`HashMap`]: struct.HashMap.html +/// [`entry`]: struct.HashMap.html#method.entry +#[stable(feature = "rust1", since = "1.0.0")] +pub enum Entry<'a, K: 'a, V: 'a> { + /// An occupied Entry. + #[stable(feature = "rust1", since = "1.0.0")] + Occupied(#[stable(feature = "rust1", since = "1.0.0")] + OccupiedEntry<'a, K, V>), + + /// A vacant Entry. + #[stable(feature = "rust1", since = "1.0.0")] + Vacant(#[stable(feature = "rust1", since = "1.0.0")] + VacantEntry<'a, K, V>), +} + +#[stable(feature= "debug_hash_map", since = "1.12.0")] +impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for Entry<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Vacant(ref v) => { + f.debug_tuple("Entry") + .field(v) + .finish() + } + Occupied(ref o) => { + f.debug_tuple("Entry") + .field(o) + .finish() + } + } + } +} + +/// A view into a single occupied location in a HashMap. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +#[stable(feature = "rust1", since = "1.0.0")] +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + key: Option, + elem: FullBucket>, +} + +#[stable(feature= "debug_hash_map", since = "1.12.0")] +impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for OccupiedEntry<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("OccupiedEntry") + .field("key", self.key()) + .field("value", self.get()) + .finish() + } +} + +/// A view into a single empty location in a HashMap. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +#[stable(feature = "rust1", since = "1.0.0")] +pub struct VacantEntry<'a, K: 'a, V: 'a> { + hash: SafeHash, + key: K, + elem: VacantEntryState>, +} + +#[stable(feature= "debug_hash_map", since = "1.12.0")] +impl<'a, K: 'a + Debug, V: 'a> Debug for VacantEntry<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_tuple("VacantEntry") + .field(self.key()) + .finish() + } +} + +/// Possible states of a VacantEntry. +enum VacantEntryState { + /// The index is occupied, but the key to insert has precedence, + /// and will kick the current one out on insertion. + NeqElem(FullBucket, usize), + /// The index is genuinely vacant. + NoElem(EmptyBucket), +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V, S> IntoIterator for &'a HashMap + where K: Eq + Hash, + S: BuildHasher +{ + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Iter<'a, K, V> { + self.iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V, S> IntoIterator for &'a mut HashMap + where K: Eq + Hash, + S: BuildHasher +{ + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(mut self) -> IterMut<'a, K, V> { + self.iter_mut() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl IntoIterator for HashMap + where K: Eq + Hash, + S: BuildHasher +{ + type Item = (K, V); + type IntoIter = IntoIter; + + /// Creates a consuming iterator, that is, one that moves each key-value + /// pair out of the map in arbitrary order. The map cannot be used after + /// calling this. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// // Not possible with .iter() + /// let vec: Vec<(&str, isize)> = map.into_iter().collect(); + /// ``` + fn into_iter(self) -> IntoIter { + IntoIter { inner: self.table.into_iter() } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K, V> FusedIterator for Iter<'a, K, V> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K, V> FusedIterator for IterMut<'a, K, V> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for IntoIter { + type Item = (K, V); + + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next().map(|(_, k, v)| (k, v)) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for IntoIter { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl FusedIterator for IntoIter {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Iterator for Keys<'a, K, V> { + type Item = &'a K; + + #[inline] + fn next(&mut self) -> Option<(&'a K)> { + self.inner.next().map(|(k, _)| k) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> ExactSizeIterator for Keys<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K, V> FusedIterator for Keys<'a, K, V> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> Iterator for Values<'a, K, V> { + type Item = &'a V; + + #[inline] + fn next(&mut self) -> Option<(&'a V)> { + self.inner.next().map(|(_, v)| v) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K, V> ExactSizeIterator for Values<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K, V> FusedIterator for Values<'a, K, V> {} + +#[stable(feature = "map_values_mut", since = "1.10.0")] +impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { + type Item = &'a mut V; + + #[inline] + fn next(&mut self) -> Option<(&'a mut V)> { + self.inner.next().map(|(_, v)| v) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "map_values_mut", since = "1.10.0")] +impl<'a, K, V> ExactSizeIterator for ValuesMut<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K, V> FusedIterator for ValuesMut<'a, K, V> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl<'a, K, V> Iterator for Drain<'a, K, V> { + type Item = (K, V); + + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next().map(|(_, k, v)| (k, v)) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +#[stable(feature = "drain", since = "1.6.0")] +impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K, V> FusedIterator for Drain<'a, K, V> {} + +impl<'a, K, V> Entry<'a, K, V> { + #[stable(feature = "rust1", since = "1.0.0")] + /// Ensures a value is in the entry by inserting the default if empty, and returns + /// a mutable reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// assert_eq!(map["poneyland"], 12); + /// + /// *map.entry("poneyland").or_insert(12) += 10; + /// assert_eq!(map["poneyland"], 22); + /// ``` + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Occupied(entry) => entry.into_mut(), + Vacant(entry) => entry.insert(default), + } + } + + #[stable(feature = "rust1", since = "1.0.0")] + /// Ensures a value is in the entry by inserting the result of the default function if empty, + /// and returns a mutable reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, String> = HashMap::new(); + /// let s = "hoho".to_owned(); + /// + /// map.entry("poneyland").or_insert_with(|| s); + /// + /// assert_eq!(map["poneyland"], "hoho".to_owned()); + /// ``` + pub fn or_insert_with V>(self, default: F) -> &'a mut V { + match self { + Occupied(entry) => entry.into_mut(), + Vacant(entry) => entry.insert(default()), + } + } + + /// Returns a reference to this entry's key. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); + /// ``` + #[stable(feature = "map_entry_keys", since = "1.10.0")] + pub fn key(&self) -> &K { + match *self { + Occupied(ref entry) => entry.key(), + Vacant(ref entry) => entry.key(), + } + } +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the key in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); + /// ``` + #[stable(feature = "map_entry_keys", since = "1.10.0")] + pub fn key(&self) -> &K { + self.elem.read().0 + } + + /// Deprecated, renamed to `remove_entry` + #[unstable(feature = "map_entry_recover_keys", issue = "34285")] + #[rustc_deprecated(since = "1.12.0", reason = "renamed to `remove_entry`")] + pub fn remove_pair(self) -> (K, V) { + self.remove_entry() + } + + /// Take the ownership of the key and value from the map. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// // We delete the entry from the map. + /// o.remove_entry(); + /// } + /// + /// assert_eq!(map.contains_key("poneyland"), false); + /// ``` + #[stable(feature = "map_entry_recover_keys2", since = "1.12.0")] + pub fn remove_entry(self) -> (K, V) { + pop_internal(self.elem) + } + + /// Gets a reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// assert_eq!(o.get(), &12); + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get(&self) -> &V { + self.elem.read().1 + } + + /// Gets a mutable reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// assert_eq!(map["poneyland"], 12); + /// if let Entry::Occupied(mut o) = map.entry("poneyland") { + /// *o.get_mut() += 10; + /// } + /// + /// assert_eq!(map["poneyland"], 22); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_mut(&mut self) -> &mut V { + self.elem.read_mut().1 + } + + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// assert_eq!(map["poneyland"], 12); + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// *o.into_mut() += 10; + /// } + /// + /// assert_eq!(map["poneyland"], 22); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_mut(self) -> &'a mut V { + self.elem.into_mut_refs().1 + } + + /// Sets the value of the entry, and returns the entry's old value. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(mut o) = map.entry("poneyland") { + /// assert_eq!(o.insert(15), 12); + /// } + /// + /// assert_eq!(map["poneyland"], 15); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, mut value: V) -> V { + let old_value = self.get_mut(); + mem::swap(&mut value, old_value); + value + } + + /// Takes the value out of the entry, and returns it. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// assert_eq!(o.remove(), 12); + /// } + /// + /// assert_eq!(map.contains_key("poneyland"), false); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(self) -> V { + pop_internal(self.elem).1 + } + + /// Returns a key that was used for search. + /// + /// The key was retained for further use. + fn take_key(&mut self) -> Option { + self.key.take() + } +} + +impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { + /// Gets a reference to the key that would be used when inserting a value + /// through the `VacantEntry`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); + /// ``` + #[stable(feature = "map_entry_keys", since = "1.10.0")] + pub fn key(&self) -> &K { + &self.key + } + + /// Take ownership of the key. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// + /// if let Entry::Vacant(v) = map.entry("poneyland") { + /// v.into_key(); + /// } + /// ``` + #[stable(feature = "map_entry_recover_keys2", since = "1.12.0")] + pub fn into_key(self) -> K { + self.key + } + + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// + /// if let Entry::Vacant(o) = map.entry("poneyland") { + /// o.insert(37); + /// } + /// assert_eq!(map["poneyland"], 37); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(self, value: V) -> &'a mut V { + match self.elem { + NeqElem(bucket, disp) => robin_hood(bucket, disp, self.hash, self.key, value), + NoElem(bucket) => bucket.put(self.hash, self.key, value).into_mut_refs().1, + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator<(K, V)> for HashMap + where K: Eq + Hash, + S: BuildHasher + Default +{ + fn from_iter>(iter: T) -> HashMap { + let mut map = HashMap::with_hasher(Default::default()); + map.extend(iter); + map + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend<(K, V)> for HashMap + where K: Eq + Hash, + S: BuildHasher +{ + fn extend>(&mut self, iter: T) { + // Keys may be already present or show multiple times in the iterator. + // Reserve the entire hint lower bound if the map is empty. + // Otherwise reserve half the hint (rounded up), so the map + // will only resize twice in the worst case. + let iter = iter.into_iter(); + let reserve = if self.is_empty() { + iter.size_hint().0 + } else { + (iter.size_hint().0 + 1) / 2 + }; + self.reserve(reserve); + for (k, v) in iter { + self.insert(k, v); + } + } +} + +#[stable(feature = "hash_extend_copy", since = "1.4.0")] +impl<'a, K, V, S> Extend<(&'a K, &'a V)> for HashMap + where K: Eq + Hash + Copy, + V: Copy, + S: BuildHasher +{ + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().map(|(&key, &value)| (key, value))); + } +} + +/// `RandomState` is the default state for [`HashMap`] types. +/// +/// A particular instance `RandomState` will create the same instances of +/// [`Hasher`], but the hashers created by two different `RandomState` +/// instances are unlikely to produce the same result for the same values. +/// +/// [`HashMap`]: struct.HashMap.html +/// [`Hasher`]: ../../hash/trait.Hasher.html +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashMap; +/// use std::collections::hash_map::RandomState; +/// +/// let s = RandomState::new(); +/// let mut map = HashMap::with_hasher(s); +/// map.insert(1, 2); +/// ``` +#[derive(Clone)] +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +pub struct RandomState { + k0: u64, + k1: u64, +} + +impl RandomState { + /// Constructs a new `RandomState` that is initialized with random keys. + /// + /// # Examples + /// + /// ``` + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// ``` + #[inline] + #[allow(deprecated)] + // rand + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub fn new() -> RandomState { + // Historically this function did not cache keys from the OS and instead + // simply always called `rand::thread_rng().gen()` twice. In #31356 it + // was discovered, however, that because we re-seed the thread-local RNG + // from the OS periodically that this can cause excessive slowdown when + // many hash maps are created on a thread. To solve this performance + // trap we cache the first set of randomly generated keys per-thread. + // + // Later in #36481 it was discovered that exposing a deterministic + // iteration order allows a form of DOS attack. To counter that we + // increment one of the seeds on every RandomState creation, giving + // every corresponding HashMap a different iteration order. + thread_local!(static KEYS: Cell<(u64, u64)> = { + let r = rand::OsRng::new(); + let mut r = r.expect("failed to create an OS RNG"); + Cell::new((r.gen(), r.gen())) + }); + + KEYS.with(|keys| { + let (k0, k1) = keys.get(); + keys.set((k0.wrapping_add(1), k1)); + RandomState { k0: k0, k1: k1 } + }) + } +} + +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +impl BuildHasher for RandomState { + type Hasher = DefaultHasher; + #[inline] + #[allow(deprecated)] + fn build_hasher(&self) -> DefaultHasher { + DefaultHasher(SipHasher13::new_with_keys(self.k0, self.k1)) + } +} + +/// The default [`Hasher`] used by [`RandomState`]. +/// +/// The internal algorithm is not specified, and so it and its hashes should +/// not be relied upon over releases. +/// +/// [`RandomState`]: struct.RandomState.html +/// [`Hasher`]: ../../hash/trait.Hasher.html +#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] +#[allow(deprecated)] +#[derive(Debug)] +pub struct DefaultHasher(SipHasher13); + +impl DefaultHasher { + /// Creates a new `DefaultHasher`. + /// + /// This hasher is not guaranteed to be the same as all other + /// `DefaultHasher` instances, but is the same as all other `DefaultHasher` + /// instances created through `new` or `default`. + #[stable(feature = "hashmap_default_hasher", since = "1.13.0")] + #[allow(deprecated)] + pub fn new() -> DefaultHasher { + DefaultHasher(SipHasher13::new_with_keys(0, 0)) + } +} + +#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] +impl Default for DefaultHasher { + /// Creates a new `DefaultHasher` using [`DefaultHasher::new`]. See + /// [`DefaultHasher::new`] documentation for more information. + /// + /// [`DefaultHasher::new`]: #method.new + fn default() -> DefaultHasher { + DefaultHasher::new() + } +} + +#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] +impl Hasher for DefaultHasher { + #[inline] + fn write(&mut self, msg: &[u8]) { + self.0.write(msg) + } + + #[inline] + fn finish(&self) -> u64 { + self.0.finish() + } +} + +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +impl Default for RandomState { + /// Constructs a new `RandomState`. + #[inline] + fn default() -> RandomState { + RandomState::new() + } +} + +impl super::Recover for HashMap + where K: Eq + Hash + Borrow, + S: BuildHasher, + Q: Eq + Hash +{ + type Key = K; + + fn get(&self, key: &Q) -> Option<&K> { + self.search(key).into_occupied_bucket().map(|bucket| bucket.into_refs().0) + } + + fn take(&mut self, key: &Q) -> Option { + if self.table.size() == 0 { + return None; + } + + self.search_mut(key).into_occupied_bucket().map(|bucket| pop_internal(bucket).0) + } + + fn replace(&mut self, key: K) -> Option { + self.reserve(1); + + match self.entry(key) { + Occupied(mut occupied) => { + let key = occupied.take_key().unwrap(); + Some(mem::replace(occupied.elem.read_mut().0, key)) + } + Vacant(vacant) => { + vacant.insert(()); + None + } + } + } +} + +#[allow(dead_code)] +fn assert_covariance() { + fn map_key<'new>(v: HashMap<&'static str, u8>) -> HashMap<&'new str, u8> { + v + } + fn map_val<'new>(v: HashMap) -> HashMap { + v + } + fn iter_key<'a, 'new>(v: Iter<'a, &'static str, u8>) -> Iter<'a, &'new str, u8> { + v + } + fn iter_val<'a, 'new>(v: Iter<'a, u8, &'static str>) -> Iter<'a, u8, &'new str> { + v + } + fn into_iter_key<'new>(v: IntoIter<&'static str, u8>) -> IntoIter<&'new str, u8> { + v + } + fn into_iter_val<'new>(v: IntoIter) -> IntoIter { + v + } + fn keys_key<'a, 'new>(v: Keys<'a, &'static str, u8>) -> Keys<'a, &'new str, u8> { + v + } + fn keys_val<'a, 'new>(v: Keys<'a, u8, &'static str>) -> Keys<'a, u8, &'new str> { + v + } + fn values_key<'a, 'new>(v: Values<'a, &'static str, u8>) -> Values<'a, &'new str, u8> { + v + } + fn values_val<'a, 'new>(v: Values<'a, u8, &'static str>) -> Values<'a, u8, &'new str> { + v + } + fn drain<'new>(d: Drain<'static, &'static str, &'static str>) + -> Drain<'new, &'new str, &'new str> { + d + } +} + +#[cfg(test)] +mod test_map { + use super::HashMap; + use super::Entry::{Occupied, Vacant}; + use super::RandomState; + use cell::RefCell; + use rand::{thread_rng, Rng}; + + #[test] + fn test_zero_capacities() { + type HM = HashMap; + + let m = HM::new(); + assert_eq!(m.capacity(), 0); + + let m = HM::default(); + assert_eq!(m.capacity(), 0); + + let m = HM::with_hasher(RandomState::new()); + assert_eq!(m.capacity(), 0); + + let m = HM::with_capacity(0); + assert_eq!(m.capacity(), 0); + + let m = HM::with_capacity_and_hasher(0, RandomState::new()); + assert_eq!(m.capacity(), 0); + + let mut m = HM::new(); + m.insert(1, 1); + m.insert(2, 2); + m.remove(&1); + m.remove(&2); + m.shrink_to_fit(); + assert_eq!(m.capacity(), 0); + + let mut m = HM::new(); + m.reserve(0); + assert_eq!(m.capacity(), 0); + } + + #[test] + fn test_create_capacity_zero() { + let mut m = HashMap::with_capacity(0); + + assert!(m.insert(1, 1).is_none()); + + assert!(m.contains_key(&1)); + assert!(!m.contains_key(&0)); + } + + #[test] + fn test_insert() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + assert_eq!(*m.get(&1).unwrap(), 2); + assert_eq!(*m.get(&2).unwrap(), 4); + } + + #[test] + fn test_clone() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + let m2 = m.clone(); + assert_eq!(*m2.get(&1).unwrap(), 2); + assert_eq!(*m2.get(&2).unwrap(), 4); + assert_eq!(m2.len(), 2); + } + + thread_local! { static DROP_VECTOR: RefCell> = RefCell::new(Vec::new()) } + + #[derive(Hash, PartialEq, Eq)] + struct Dropable { + k: usize, + } + + impl Dropable { + fn new(k: usize) -> Dropable { + DROP_VECTOR.with(|slot| { + slot.borrow_mut()[k] += 1; + }); + + Dropable { k: k } + } + } + + impl Drop for Dropable { + fn drop(&mut self) { + DROP_VECTOR.with(|slot| { + slot.borrow_mut()[self.k] -= 1; + }); + } + } + + impl Clone for Dropable { + fn clone(&self) -> Dropable { + Dropable::new(self.k) + } + } + + #[test] + fn test_drops() { + DROP_VECTOR.with(|slot| { + *slot.borrow_mut() = vec![0; 200]; + }); + + { + let mut m = HashMap::new(); + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + + for i in 0..100 { + let d1 = Dropable::new(i); + let d2 = Dropable::new(i + 100); + m.insert(d1, d2); + } + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 1); + } + }); + + for i in 0..50 { + let k = Dropable::new(i); + let v = m.remove(&k); + + assert!(v.is_some()); + + DROP_VECTOR.with(|v| { + assert_eq!(v.borrow()[i], 1); + assert_eq!(v.borrow()[i+100], 1); + }); + } + + DROP_VECTOR.with(|v| { + for i in 0..50 { + assert_eq!(v.borrow()[i], 0); + assert_eq!(v.borrow()[i+100], 0); + } + + for i in 50..100 { + assert_eq!(v.borrow()[i], 1); + assert_eq!(v.borrow()[i+100], 1); + } + }); + } + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + } + + #[test] + fn test_into_iter_drops() { + DROP_VECTOR.with(|v| { + *v.borrow_mut() = vec![0; 200]; + }); + + let hm = { + let mut hm = HashMap::new(); + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + + for i in 0..100 { + let d1 = Dropable::new(i); + let d2 = Dropable::new(i + 100); + hm.insert(d1, d2); + } + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 1); + } + }); + + hm + }; + + // By the way, ensure that cloning doesn't screw up the dropping. + drop(hm.clone()); + + { + let mut half = hm.into_iter().take(50); + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 1); + } + }); + + for _ in half.by_ref() {} + + DROP_VECTOR.with(|v| { + let nk = (0..100) + .filter(|&i| v.borrow()[i] == 1) + .count(); + + let nv = (0..100) + .filter(|&i| v.borrow()[i + 100] == 1) + .count(); + + assert_eq!(nk, 50); + assert_eq!(nv, 50); + }); + }; + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + } + + #[test] + fn test_empty_remove() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.remove(&0), None); + } + + #[test] + fn test_empty_entry() { + let mut m: HashMap = HashMap::new(); + match m.entry(0) { + Occupied(_) => panic!(), + Vacant(_) => {} + } + assert!(*m.entry(0).or_insert(true)); + assert_eq!(m.len(), 1); + } + + #[test] + fn test_empty_iter() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.drain().next(), None); + assert_eq!(m.keys().next(), None); + assert_eq!(m.values().next(), None); + assert_eq!(m.values_mut().next(), None); + assert_eq!(m.iter().next(), None); + assert_eq!(m.iter_mut().next(), None); + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + assert_eq!(m.into_iter().next(), None); + } + + #[test] + fn test_lots_of_insertions() { + let mut m = HashMap::new(); + + // Try this a few times to make sure we never screw up the hashmap's + // internal state. + for _ in 0..10 { + assert!(m.is_empty()); + + for i in 1..1001 { + assert!(m.insert(i, i).is_none()); + + for j in 1..i + 1 { + let r = m.get(&j); + assert_eq!(r, Some(&j)); + } + + for j in i + 1..1001 { + let r = m.get(&j); + assert_eq!(r, None); + } + } + + for i in 1001..2001 { + assert!(!m.contains_key(&i)); + } + + // remove forwards + for i in 1..1001 { + assert!(m.remove(&i).is_some()); + + for j in 1..i + 1 { + assert!(!m.contains_key(&j)); + } + + for j in i + 1..1001 { + assert!(m.contains_key(&j)); + } + } + + for i in 1..1001 { + assert!(!m.contains_key(&i)); + } + + for i in 1..1001 { + assert!(m.insert(i, i).is_none()); + } + + // remove backwards + for i in (1..1001).rev() { + assert!(m.remove(&i).is_some()); + + for j in i..1001 { + assert!(!m.contains_key(&j)); + } + + for j in 1..i { + assert!(m.contains_key(&j)); + } + } + } + } + + #[test] + fn test_find_mut() { + let mut m = HashMap::new(); + assert!(m.insert(1, 12).is_none()); + assert!(m.insert(2, 8).is_none()); + assert!(m.insert(5, 14).is_none()); + let new = 100; + match m.get_mut(&5) { + None => panic!(), + Some(x) => *x = new, + } + assert_eq!(m.get(&5), Some(&new)); + } + + #[test] + fn test_insert_overwrite() { + let mut m = HashMap::new(); + assert!(m.insert(1, 2).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert!(!m.insert(1, 3).is_none()); + assert_eq!(*m.get(&1).unwrap(), 3); + } + + #[test] + fn test_insert_conflicts() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1, 2).is_none()); + assert!(m.insert(5, 3).is_none()); + assert!(m.insert(9, 4).is_none()); + assert_eq!(*m.get(&9).unwrap(), 4); + assert_eq!(*m.get(&5).unwrap(), 3); + assert_eq!(*m.get(&1).unwrap(), 2); + } + + #[test] + fn test_conflict_remove() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1, 2).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert!(m.insert(5, 3).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert_eq!(*m.get(&5).unwrap(), 3); + assert!(m.insert(9, 4).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert_eq!(*m.get(&5).unwrap(), 3); + assert_eq!(*m.get(&9).unwrap(), 4); + assert!(m.remove(&1).is_some()); + assert_eq!(*m.get(&9).unwrap(), 4); + assert_eq!(*m.get(&5).unwrap(), 3); + } + + #[test] + fn test_is_empty() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1, 2).is_none()); + assert!(!m.is_empty()); + assert!(m.remove(&1).is_some()); + assert!(m.is_empty()); + } + + #[test] + fn test_pop() { + let mut m = HashMap::new(); + m.insert(1, 2); + assert_eq!(m.remove(&1), Some(2)); + assert_eq!(m.remove(&1), None); + } + + #[test] + fn test_iterate() { + let mut m = HashMap::with_capacity(4); + for i in 0..32 { + assert!(m.insert(i, i*2).is_none()); + } + assert_eq!(m.len(), 32); + + let mut observed: u32 = 0; + + for (k, v) in &m { + assert_eq!(*v, *k * 2); + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_keys() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: HashMap<_, _> = vec.into_iter().collect(); + let keys: Vec<_> = map.keys().cloned().collect(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&1)); + assert!(keys.contains(&2)); + assert!(keys.contains(&3)); + } + + #[test] + fn test_values() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: HashMap<_, _> = vec.into_iter().collect(); + let values: Vec<_> = map.values().cloned().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&'a')); + assert!(values.contains(&'b')); + assert!(values.contains(&'c')); + } + + #[test] + fn test_values_mut() { + let vec = vec![(1, 1), (2, 2), (3, 3)]; + let mut map: HashMap<_, _> = vec.into_iter().collect(); + for value in map.values_mut() { + *value = (*value) * 2 + } + let values: Vec<_> = map.values().cloned().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&2)); + assert!(values.contains(&4)); + assert!(values.contains(&6)); + } + + #[test] + fn test_find() { + let mut m = HashMap::new(); + assert!(m.get(&1).is_none()); + m.insert(1, 2); + match m.get(&1) { + None => panic!(), + Some(v) => assert_eq!(*v, 2), + } + } + + #[test] + fn test_eq() { + let mut m1 = HashMap::new(); + m1.insert(1, 2); + m1.insert(2, 3); + m1.insert(3, 4); + + let mut m2 = HashMap::new(); + m2.insert(1, 2); + m2.insert(2, 3); + + assert!(m1 != m2); + + m2.insert(3, 4); + + assert_eq!(m1, m2); + } + + #[test] + fn test_show() { + let mut map = HashMap::new(); + let empty: HashMap = HashMap::new(); + + map.insert(1, 2); + map.insert(3, 4); + + let map_str = format!("{:?}", map); + + assert!(map_str == "{1: 2, 3: 4}" || + map_str == "{3: 4, 1: 2}"); + assert_eq!(format!("{:?}", empty), "{}"); + } + + #[test] + fn test_expand() { + let mut m = HashMap::new(); + + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + + let mut i = 0; + let old_raw_cap = m.raw_capacity(); + while old_raw_cap == m.raw_capacity() { + m.insert(i, i); + i += 1; + } + + assert_eq!(m.len(), i); + assert!(!m.is_empty()); + } + + #[test] + fn test_behavior_resize_policy() { + let mut m = HashMap::new(); + + assert_eq!(m.len(), 0); + assert_eq!(m.raw_capacity(), 0); + assert!(m.is_empty()); + + m.insert(0, 0); + m.remove(&0); + assert!(m.is_empty()); + let initial_raw_cap = m.raw_capacity(); + m.reserve(initial_raw_cap); + let raw_cap = m.raw_capacity(); + + assert_eq!(raw_cap, initial_raw_cap * 2); + + let mut i = 0; + for _ in 0..raw_cap * 3 / 4 { + m.insert(i, i); + i += 1; + } + // three quarters full + + assert_eq!(m.len(), i); + assert_eq!(m.raw_capacity(), raw_cap); + + for _ in 0..raw_cap / 4 { + m.insert(i, i); + i += 1; + } + // half full + + let new_raw_cap = m.raw_capacity(); + assert_eq!(new_raw_cap, raw_cap * 2); + + for _ in 0..raw_cap / 2 - 1 { + i -= 1; + m.remove(&i); + assert_eq!(m.raw_capacity(), new_raw_cap); + } + // A little more than one quarter full. + m.shrink_to_fit(); + assert_eq!(m.raw_capacity(), raw_cap); + // again, a little more than half full + for _ in 0..raw_cap / 2 - 1 { + i -= 1; + m.remove(&i); + } + m.shrink_to_fit(); + + assert_eq!(m.len(), i); + assert!(!m.is_empty()); + assert_eq!(m.raw_capacity(), initial_raw_cap); + } + + #[test] + fn test_reserve_shrink_to_fit() { + let mut m = HashMap::new(); + m.insert(0, 0); + m.remove(&0); + assert!(m.capacity() >= m.len()); + for i in 0..128 { + m.insert(i, i); + } + m.reserve(256); + + let usable_cap = m.capacity(); + for i in 128..(128 + 256) { + m.insert(i, i); + assert_eq!(m.capacity(), usable_cap); + } + + for i in 100..(128 + 256) { + assert_eq!(m.remove(&i), Some(i)); + } + m.shrink_to_fit(); + + assert_eq!(m.len(), 100); + assert!(!m.is_empty()); + assert!(m.capacity() >= m.len()); + + for i in 0..100 { + assert_eq!(m.remove(&i), Some(i)); + } + m.shrink_to_fit(); + m.insert(0, 0); + + assert_eq!(m.len(), 1); + assert!(m.capacity() >= m.len()); + assert_eq!(m.remove(&0), Some(0)); + } + + #[test] + fn test_from_iter() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap<_, _> = xs.iter().cloned().collect(); + + for &(k, v) in &xs { + assert_eq!(map.get(&k), Some(&v)); + } + } + + #[test] + fn test_size_hint() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.size_hint(), (3, Some(3))); + } + + #[test] + fn test_iter_len() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.len(), 3); + } + + #[test] + fn test_mut_size_hint() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let mut map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter_mut(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.size_hint(), (3, Some(3))); + } + + #[test] + fn test_iter_mut_len() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let mut map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter_mut(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.len(), 3); + } + + #[test] + fn test_index() { + let mut map = HashMap::new(); + + map.insert(1, 2); + map.insert(2, 1); + map.insert(3, 4); + + assert_eq!(map[&2], 1); + } + + #[test] + #[should_panic] + fn test_index_nonexistent() { + let mut map = HashMap::new(); + + map.insert(1, 2); + map.insert(2, 1); + map.insert(3, 4); + + map[&4]; + } + + #[test] + fn test_entry() { + let xs = [(1, 10), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; + + let mut map: HashMap<_, _> = xs.iter().cloned().collect(); + + // Existing key (insert) + match map.entry(1) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + assert_eq!(view.get(), &10); + assert_eq!(view.insert(100), 10); + } + } + assert_eq!(map.get(&1).unwrap(), &100); + assert_eq!(map.len(), 6); + + + // Existing key (update) + match map.entry(2) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + let v = view.get_mut(); + let new_v = (*v) * 10; + *v = new_v; + } + } + assert_eq!(map.get(&2).unwrap(), &200); + assert_eq!(map.len(), 6); + + // Existing key (take) + match map.entry(3) { + Vacant(_) => unreachable!(), + Occupied(view) => { + assert_eq!(view.remove(), 30); + } + } + assert_eq!(map.get(&3), None); + assert_eq!(map.len(), 5); + + + // Inexistent key (insert) + match map.entry(10) { + Occupied(_) => unreachable!(), + Vacant(view) => { + assert_eq!(*view.insert(1000), 1000); + } + } + assert_eq!(map.get(&10).unwrap(), &1000); + assert_eq!(map.len(), 6); + } + + #[test] + fn test_entry_take_doesnt_corrupt() { + #![allow(deprecated)] //rand + // Test for #19292 + fn check(m: &HashMap) { + for k in m.keys() { + assert!(m.contains_key(k), + "{} is in keys() but not in the map?", k); + } + } + + let mut m = HashMap::new(); + let mut rng = thread_rng(); + + // Populate the map with some items. + for _ in 0..50 { + let x = rng.gen_range(-10, 10); + m.insert(x, ()); + } + + for i in 0..1000 { + let x = rng.gen_range(-10, 10); + match m.entry(x) { + Vacant(_) => {} + Occupied(e) => { + println!("{}: remove {}", i, x); + e.remove(); + } + } + + check(&m); + } + } + + #[test] + fn test_extend_ref() { + let mut a = HashMap::new(); + a.insert(1, "one"); + let mut b = HashMap::new(); + b.insert(2, "two"); + b.insert(3, "three"); + + a.extend(&b); + + assert_eq!(a.len(), 3); + assert_eq!(a[&1], "one"); + assert_eq!(a[&2], "two"); + assert_eq!(a[&3], "three"); + } + + #[test] + fn test_capacity_not_less_than_len() { + let mut a = HashMap::new(); + let mut item = 0; + + for _ in 0..116 { + a.insert(item, 0); + item += 1; + } + + assert!(a.capacity() > a.len()); + + let free = a.capacity() - a.len(); + for _ in 0..free { + a.insert(item, 0); + item += 1; + } + + assert_eq!(a.len(), a.capacity()); + + // Insert at capacity should cause allocation. + a.insert(item, 0); + assert!(a.capacity() > a.len()); + } + + #[test] + fn test_occupied_entry_key() { + let mut a = HashMap::new(); + let key = "hello there"; + let value = "value goes here"; + assert!(a.is_empty()); + a.insert(key.clone(), value.clone()); + assert_eq!(a.len(), 1); + assert_eq!(a[key], value); + + match a.entry(key.clone()) { + Vacant(_) => panic!(), + Occupied(e) => assert_eq!(key, *e.key()), + } + assert_eq!(a.len(), 1); + assert_eq!(a[key], value); + } + + #[test] + fn test_vacant_entry_key() { + let mut a = HashMap::new(); + let key = "hello there"; + let value = "value goes here"; + + assert!(a.is_empty()); + match a.entry(key.clone()) { + Occupied(_) => panic!(), + Vacant(e) => { + assert_eq!(key, *e.key()); + e.insert(value.clone()); + } + } + assert_eq!(a.len(), 1); + assert_eq!(a[key], value); + } +} diff --git a/ctr-std/src/collections/hash/mod.rs b/ctr-std/src/collections/hash/mod.rs new file mode 100644 index 0000000..7a22bec --- /dev/null +++ b/ctr-std/src/collections/hash/mod.rs @@ -0,0 +1,24 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Unordered containers, implemented as hash-tables + +mod bench; +mod table; +pub mod map; +pub mod set; + +trait Recover { + type Key; + + fn get(&self, key: &Q) -> Option<&Self::Key>; + fn take(&mut self, key: &Q) -> Option; + fn replace(&mut self, key: Self::Key) -> Option; +} diff --git a/ctr-std/src/collections/hash/set.rs b/ctr-std/src/collections/hash/set.rs new file mode 100644 index 0000000..72af612 --- /dev/null +++ b/ctr-std/src/collections/hash/set.rs @@ -0,0 +1,1531 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use borrow::Borrow; +use fmt; +use hash::{Hash, BuildHasher}; +use iter::{Chain, FromIterator, FusedIterator}; +use ops::{BitOr, BitAnd, BitXor, Sub}; + +use super::Recover; +use super::map::{self, HashMap, Keys, RandomState}; + +// Future Optimization (FIXME!) +// ============================= +// +// Iteration over zero sized values is a noop. There is no need +// for `bucket.val` in the case of HashSet. I suppose we would need HKT +// to get rid of it properly. + +/// An implementation of a hash set using the underlying representation of a +/// HashMap where the value is (). +/// +/// As with the `HashMap` type, a `HashSet` requires that the elements +/// implement the `Eq` and `Hash` traits. This can frequently be achieved by +/// using `#[derive(PartialEq, Eq, Hash)]`. If you implement these yourself, +/// it is important that the following property holds: +/// +/// ```text +/// k1 == k2 -> hash(k1) == hash(k2) +/// ``` +/// +/// In other words, if two keys are equal, their hashes must be equal. +/// +/// +/// It is a logic error for an item to be modified in such a way that the +/// item's hash, as determined by the `Hash` trait, or its equality, as +/// determined by the `Eq` trait, changes while it is in the set. This is +/// normally only possible through `Cell`, `RefCell`, global state, I/O, or +/// unsafe code. +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashSet; +/// // Type inference lets us omit an explicit type signature (which +/// // would be `HashSet<&str>` in this example). +/// let mut books = HashSet::new(); +/// +/// // Add some books. +/// books.insert("A Dance With Dragons"); +/// books.insert("To Kill a Mockingbird"); +/// books.insert("The Odyssey"); +/// books.insert("The Great Gatsby"); +/// +/// // Check for a specific one. +/// if !books.contains("The Winds of Winter") { +/// println!("We have {} books, but The Winds of Winter ain't one.", +/// books.len()); +/// } +/// +/// // Remove a book. +/// books.remove("The Odyssey"); +/// +/// // Iterate over everything. +/// for book in &books { +/// println!("{}", book); +/// } +/// ``` +/// +/// The easiest way to use `HashSet` with a custom type is to derive +/// `Eq` and `Hash`. We must also derive `PartialEq`, this will in the +/// future be implied by `Eq`. +/// +/// ``` +/// use std::collections::HashSet; +/// #[derive(Hash, Eq, PartialEq, Debug)] +/// struct Viking<'a> { +/// name: &'a str, +/// power: usize, +/// } +/// +/// let mut vikings = HashSet::new(); +/// +/// vikings.insert(Viking { name: "Einar", power: 9 }); +/// vikings.insert(Viking { name: "Einar", power: 9 }); +/// vikings.insert(Viking { name: "Olaf", power: 4 }); +/// vikings.insert(Viking { name: "Harald", power: 8 }); +/// +/// // Use derived implementation to print the vikings. +/// for x in &vikings { +/// println!("{:?}", x); +/// } +/// ``` +/// +/// HashSet with fixed list of elements can be initialized from an array: +/// +/// ``` +/// use std::collections::HashSet; +/// +/// fn main() { +/// let viking_names: HashSet<&str> = +/// [ "Einar", "Olaf", "Harald" ].iter().cloned().collect(); +/// // use the values stored in the set +/// } +/// ``` + + +#[derive(Clone)] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct HashSet { + map: HashMap, +} + +impl HashSet { + /// Creates an empty HashSet. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::new(); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new() -> HashSet { + HashSet { map: HashMap::new() } + } + + /// Creates an empty `HashSet` with the specified capacity. + /// + /// The hash set will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash set will not allocate. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::with_capacity(10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> HashSet { + HashSet { map: HashMap::with_capacity(capacity) } + } +} + +impl HashSet + where T: Eq + Hash, + S: BuildHasher +{ + /// Creates a new empty hash set which will use the given hasher to hash + /// keys. + /// + /// The hash set is also created with the default initial capacity. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut set = HashSet::with_hasher(s); + /// set.insert(2); + /// ``` + #[inline] + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub fn with_hasher(hasher: S) -> HashSet { + HashSet { map: HashMap::with_hasher(hasher) } + } + + /// Creates an empty HashSet with with the specified capacity, using + /// `hasher` to hash the keys. + /// + /// The hash set will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash set will not allocate. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut set = HashSet::with_capacity_and_hasher(10, s); + /// set.insert(1); + /// ``` + #[inline] + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> HashSet { + HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } + } + + /// Returns a reference to the set's hasher. + #[stable(feature = "hashmap_public_hasher", since = "1.9.0")] + pub fn hasher(&self) -> &S { + self.map.hasher() + } + + /// Returns the number of elements the set can hold without reallocating. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let set: HashSet = HashSet::with_capacity(100); + /// assert!(set.capacity() >= 100); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.map.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `HashSet`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new allocation size overflows `usize`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::new(); + /// set.reserve(10); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.map.reserve(additional) + } + + /// Shrinks the capacity of the set as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::with_capacity(100); + /// set.insert(1); + /// set.insert(2); + /// assert!(set.capacity() >= 100); + /// set.shrink_to_fit(); + /// assert!(set.capacity() >= 2); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + self.map.shrink_to_fit() + } + + /// An iterator visiting all elements in arbitrary order. + /// Iterator element type is &'a T. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a"); + /// set.insert("b"); + /// + /// // Will print in an arbitrary order. + /// for x in set.iter() { + /// println!("{}", x); + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn iter(&self) -> Iter { + Iter { iter: self.map.keys() } + } + + /// Visit the values representing the difference. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Can be seen as `a - b`. + /// for x in a.difference(&b) { + /// println!("{}", x); // Print 1 + /// } + /// + /// let diff: HashSet<_> = a.difference(&b).cloned().collect(); + /// assert_eq!(diff, [1].iter().cloned().collect()); + /// + /// // Note that difference is not symmetric, + /// // and `b - a` means something else: + /// let diff: HashSet<_> = b.difference(&a).cloned().collect(); + /// assert_eq!(diff, [4].iter().cloned().collect()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn difference<'a>(&'a self, other: &'a HashSet) -> Difference<'a, T, S> { + Difference { + iter: self.iter(), + other: other, + } + } + + /// Visit the values representing the symmetric difference. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Print 1, 4 in arbitrary order. + /// for x in a.symmetric_difference(&b) { + /// println!("{}", x); + /// } + /// + /// let diff1: HashSet<_> = a.symmetric_difference(&b).cloned().collect(); + /// let diff2: HashSet<_> = b.symmetric_difference(&a).cloned().collect(); + /// + /// assert_eq!(diff1, diff2); + /// assert_eq!(diff1, [1, 4].iter().cloned().collect()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn symmetric_difference<'a>(&'a self, + other: &'a HashSet) + -> SymmetricDifference<'a, T, S> { + SymmetricDifference { iter: self.difference(other).chain(other.difference(self)) } + } + + /// Visit the values representing the intersection. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Print 2, 3 in arbitrary order. + /// for x in a.intersection(&b) { + /// println!("{}", x); + /// } + /// + /// let intersection: HashSet<_> = a.intersection(&b).cloned().collect(); + /// assert_eq!(intersection, [2, 3].iter().cloned().collect()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn intersection<'a>(&'a self, other: &'a HashSet) -> Intersection<'a, T, S> { + Intersection { + iter: self.iter(), + other: other, + } + } + + /// Visit the values representing the union. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Print 1, 2, 3, 4 in arbitrary order. + /// for x in a.union(&b) { + /// println!("{}", x); + /// } + /// + /// let union: HashSet<_> = a.union(&b).cloned().collect(); + /// assert_eq!(union, [1, 2, 3, 4].iter().cloned().collect()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn union<'a>(&'a self, other: &'a HashSet) -> Union<'a, T, S> { + Union { iter: self.iter().chain(other.difference(self)) } + } + + /// Returns the number of elements in the set. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// assert_eq!(v.len(), 0); + /// v.insert(1); + /// assert_eq!(v.len(), 1); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns true if the set contains no elements. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// assert!(v.is_empty()); + /// v.insert(1); + /// assert!(!v.is_empty()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Clears the set, returning all elements in an iterator. + #[inline] + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain(&mut self) -> Drain { + Drain { iter: self.map.drain() } + } + + /// Clears the set, removing all values. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// v.insert(1); + /// v.clear(); + /// assert!(v.is_empty()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.map.clear() + } + + /// Returns `true` if the set contains a value. + /// + /// The value may be any borrowed form of the set's value type, but + /// `Hash` and `Eq` on the borrowed form *must* match those for + /// the value type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let set: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// assert_eq!(set.contains(&1), true); + /// assert_eq!(set.contains(&4), false); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn contains(&self, value: &Q) -> bool + where T: Borrow, + Q: Hash + Eq + { + self.map.contains_key(value) + } + + /// Returns a reference to the value in the set, if any, that is equal to the given value. + /// + /// The value may be any borrowed form of the set's value type, but + /// `Hash` and `Eq` on the borrowed form *must* match those for + /// the value type. + #[stable(feature = "set_recovery", since = "1.9.0")] + pub fn get(&self, value: &Q) -> Option<&T> + where T: Borrow, + Q: Hash + Eq + { + Recover::get(&self.map, value) + } + + /// Returns `true` if the set has no elements in common with `other`. + /// This is equivalent to checking for an empty intersection. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let mut b = HashSet::new(); + /// + /// assert_eq!(a.is_disjoint(&b), true); + /// b.insert(4); + /// assert_eq!(a.is_disjoint(&b), true); + /// b.insert(1); + /// assert_eq!(a.is_disjoint(&b), false); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_disjoint(&self, other: &HashSet) -> bool { + self.iter().all(|v| !other.contains(v)) + } + + /// Returns `true` if the set is a subset of another. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let sup: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.is_subset(&sup), true); + /// set.insert(2); + /// assert_eq!(set.is_subset(&sup), true); + /// set.insert(4); + /// assert_eq!(set.is_subset(&sup), false); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_subset(&self, other: &HashSet) -> bool { + self.iter().all(|v| other.contains(v)) + } + + /// Returns `true` if the set is a superset of another. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let sub: HashSet<_> = [1, 2].iter().cloned().collect(); + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.is_superset(&sub), false); + /// + /// set.insert(0); + /// set.insert(1); + /// assert_eq!(set.is_superset(&sub), false); + /// + /// set.insert(2); + /// assert_eq!(set.is_superset(&sub), true); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_superset(&self, other: &HashSet) -> bool { + other.is_subset(self) + } + + /// Adds a value to the set. + /// + /// If the set did not have this value present, `true` is returned. + /// + /// If the set did have this value present, `false` is returned. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.insert(2), true); + /// assert_eq!(set.insert(2), false); + /// assert_eq!(set.len(), 1); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, value: T) -> bool { + self.map.insert(value, ()).is_none() + } + + /// Adds a value to the set, replacing the existing value, if any, that is equal to the given + /// one. Returns the replaced value. + #[stable(feature = "set_recovery", since = "1.9.0")] + pub fn replace(&mut self, value: T) -> Option { + Recover::replace(&mut self.map, value) + } + + /// Removes a value from the set. Returns `true` if the value was + /// present in the set. + /// + /// The value may be any borrowed form of the set's value type, but + /// `Hash` and `Eq` on the borrowed form *must* match those for + /// the value type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::new(); + /// + /// set.insert(2); + /// assert_eq!(set.remove(&2), true); + /// assert_eq!(set.remove(&2), false); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, value: &Q) -> bool + where T: Borrow, + Q: Hash + Eq + { + self.map.remove(value).is_some() + } + + /// Removes and returns the value in the set, if any, that is equal to the given one. + /// + /// The value may be any borrowed form of the set's value type, but + /// `Hash` and `Eq` on the borrowed form *must* match those for + /// the value type. + #[stable(feature = "set_recovery", since = "1.9.0")] + pub fn take(&mut self, value: &Q) -> Option + where T: Borrow, + Q: Hash + Eq + { + Recover::take(&mut self.map, value) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for HashSet + where T: Eq + Hash, + S: BuildHasher +{ + fn eq(&self, other: &HashSet) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|key| other.contains(key)) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for HashSet + where T: Eq + Hash, + S: BuildHasher +{ +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for HashSet + where T: Eq + Hash + fmt::Debug, + S: BuildHasher +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator for HashSet + where T: Eq + Hash, + S: BuildHasher + Default +{ + fn from_iter>(iter: I) -> HashSet { + let mut set = HashSet::with_hasher(Default::default()); + set.extend(iter); + set + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend for HashSet + where T: Eq + Hash, + S: BuildHasher +{ + fn extend>(&mut self, iter: I) { + self.map.extend(iter.into_iter().map(|k| (k, ()))); + } +} + +#[stable(feature = "hash_extend_copy", since = "1.4.0")] +impl<'a, T, S> Extend<&'a T> for HashSet + where T: 'a + Eq + Hash + Copy, + S: BuildHasher +{ + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for HashSet + where T: Eq + Hash, + S: BuildHasher + Default +{ + /// Creates an empty `HashSet` with the `Default` value for the hasher. + fn default() -> HashSet { + HashSet { map: HashMap::default() } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, T, S> BitOr<&'b HashSet> for &'a HashSet + where T: Eq + Hash + Clone, + S: BuildHasher + Default +{ + type Output = HashSet; + + /// Returns the union of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a | &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 3, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitor(self, rhs: &HashSet) -> HashSet { + self.union(rhs).cloned().collect() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, T, S> BitAnd<&'b HashSet> for &'a HashSet + where T: Eq + Hash + Clone, + S: BuildHasher + Default +{ + type Output = HashSet; + + /// Returns the intersection of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect(); + /// + /// let set = &a & &b; + /// + /// let mut i = 0; + /// let expected = [2, 3]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitand(self, rhs: &HashSet) -> HashSet { + self.intersection(rhs).cloned().collect() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, T, S> BitXor<&'b HashSet> for &'a HashSet + where T: Eq + Hash + Clone, + S: BuildHasher + Default +{ + type Output = HashSet; + + /// Returns the symmetric difference of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a ^ &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitxor(self, rhs: &HashSet) -> HashSet { + self.symmetric_difference(rhs).cloned().collect() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, T, S> Sub<&'b HashSet> for &'a HashSet + where T: Eq + Hash + Clone, + S: BuildHasher + Default +{ + type Output = HashSet; + + /// Returns the difference of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a - &b; + /// + /// let mut i = 0; + /// let expected = [1, 2]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn sub(self, rhs: &HashSet) -> HashSet { + self.difference(rhs).cloned().collect() + } +} + +/// HashSet iterator +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Iter<'a, K: 'a> { + iter: Keys<'a, K, ()>, +} + +/// HashSet move iterator +#[stable(feature = "rust1", since = "1.0.0")] +pub struct IntoIter { + iter: map::IntoIter, +} + +/// HashSet drain iterator +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Drain<'a, K: 'a> { + iter: map::Drain<'a, K, ()>, +} + +/// Intersection iterator +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Intersection<'a, T: 'a, S: 'a> { + // iterator of the first set + iter: Iter<'a, T>, + // the second set + other: &'a HashSet, +} + +/// Difference iterator +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Difference<'a, T: 'a, S: 'a> { + // iterator of the first set + iter: Iter<'a, T>, + // the second set + other: &'a HashSet, +} + +/// Symmetric difference iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct SymmetricDifference<'a, T: 'a, S: 'a> { + iter: Chain, Difference<'a, T, S>>, +} + +/// Set union iterator. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Union<'a, T: 'a, S: 'a> { + iter: Chain, Difference<'a, T, S>>, +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> IntoIterator for &'a HashSet + where T: Eq + Hash, + S: BuildHasher +{ + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Iter<'a, T> { + self.iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl IntoIterator for HashSet + where T: Eq + Hash, + S: BuildHasher +{ + type Item = T; + type IntoIter = IntoIter; + + /// Creates a consuming iterator, that is, one that moves each value out + /// of the set in arbitrary order. The set cannot be used after calling + /// this. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a".to_string()); + /// set.insert("b".to_string()); + /// + /// // Not possible to collect to a Vec with a regular `.iter()`. + /// let v: Vec = set.into_iter().collect(); + /// + /// // Will print in an arbitrary order. + /// for x in &v { + /// println!("{}", x); + /// } + /// ``` + fn into_iter(self) -> IntoIter { + IntoIter { iter: self.map.into_iter() } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K> Clone for Iter<'a, K> { + fn clone(&self) -> Iter<'a, K> { + Iter { iter: self.iter.clone() } + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K> Iterator for Iter<'a, K> { + type Item = &'a K; + + fn next(&mut self) -> Option<&'a K> { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K> ExactSizeIterator for Iter<'a, K> { + fn len(&self) -> usize { + self.iter.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K> FusedIterator for Iter<'a, K> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for IntoIter { + type Item = K; + + fn next(&mut self) -> Option { + self.iter.next().map(|(k, _)| k) + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl FusedIterator for IntoIter {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K> Iterator for Drain<'a, K> { + type Item = K; + + fn next(&mut self) -> Option { + self.iter.next().map(|(k, _)| k) + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, K> ExactSizeIterator for Drain<'a, K> { + fn len(&self) -> usize { + self.iter.len() + } +} +#[unstable(feature = "fused", issue = "35602")] +impl<'a, K> FusedIterator for Drain<'a, K> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Clone for Intersection<'a, T, S> { + fn clone(&self) -> Intersection<'a, T, S> { + Intersection { iter: self.iter.clone(), ..*self } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Iterator for Intersection<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + loop { + match self.iter.next() { + None => return None, + Some(elt) => { + if self.other.contains(elt) { + return Some(elt); + } + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (_, upper) = self.iter.size_hint(); + (0, upper) + } +} + +#[unstable(feature = "fused", issue = "35602")] +impl<'a, T, S> FusedIterator for Intersection<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Clone for Difference<'a, T, S> { + fn clone(&self) -> Difference<'a, T, S> { + Difference { iter: self.iter.clone(), ..*self } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Iterator for Difference<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + loop { + match self.iter.next() { + None => return None, + Some(elt) => { + if !self.other.contains(elt) { + return Some(elt); + } + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (_, upper) = self.iter.size_hint(); + (0, upper) + } +} + +#[unstable(feature = "fused", issue = "35602")] +impl<'a, T, S> FusedIterator for Difference<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Clone for SymmetricDifference<'a, T, S> { + fn clone(&self) -> SymmetricDifference<'a, T, S> { + SymmetricDifference { iter: self.iter.clone() } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Iterator for SymmetricDifference<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +#[unstable(feature = "fused", issue = "35602")] +impl<'a, T, S> FusedIterator for SymmetricDifference<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Clone for Union<'a, T, S> { + fn clone(&self) -> Union<'a, T, S> { + Union { iter: self.iter.clone() } + } +} + +#[unstable(feature = "fused", issue = "35602")] +impl<'a, T, S> FusedIterator for Union<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, S> Iterator for Union<'a, T, S> + where T: Eq + Hash, + S: BuildHasher +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +#[allow(dead_code)] +fn assert_covariance() { + fn set<'new>(v: HashSet<&'static str>) -> HashSet<&'new str> { + v + } + fn iter<'a, 'new>(v: Iter<'a, &'static str>) -> Iter<'a, &'new str> { + v + } + fn into_iter<'new>(v: IntoIter<&'static str>) -> IntoIter<&'new str> { + v + } + fn difference<'a, 'new>(v: Difference<'a, &'static str, RandomState>) + -> Difference<'a, &'new str, RandomState> { + v + } + fn symmetric_difference<'a, 'new>(v: SymmetricDifference<'a, &'static str, RandomState>) + -> SymmetricDifference<'a, &'new str, RandomState> { + v + } + fn intersection<'a, 'new>(v: Intersection<'a, &'static str, RandomState>) + -> Intersection<'a, &'new str, RandomState> { + v + } + fn union<'a, 'new>(v: Union<'a, &'static str, RandomState>) + -> Union<'a, &'new str, RandomState> { + v + } + fn drain<'new>(d: Drain<'static, &'static str>) -> Drain<'new, &'new str> { + d + } +} + +#[cfg(test)] +mod test_set { + use super::HashSet; + use super::super::map::RandomState; + + #[test] + fn test_zero_capacities() { + type HS = HashSet; + + let s = HS::new(); + assert_eq!(s.capacity(), 0); + + let s = HS::default(); + assert_eq!(s.capacity(), 0); + + let s = HS::with_hasher(RandomState::new()); + assert_eq!(s.capacity(), 0); + + let s = HS::with_capacity(0); + assert_eq!(s.capacity(), 0); + + let s = HS::with_capacity_and_hasher(0, RandomState::new()); + assert_eq!(s.capacity(), 0); + + let mut s = HS::new(); + s.insert(1); + s.insert(2); + s.remove(&1); + s.remove(&2); + s.shrink_to_fit(); + assert_eq!(s.capacity(), 0); + + let mut s = HS::new(); + s.reserve(0); + assert_eq!(s.capacity(), 0); + } + + #[test] + fn test_disjoint() { + let mut xs = HashSet::new(); + let mut ys = HashSet::new(); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(5)); + assert!(ys.insert(11)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(7)); + assert!(xs.insert(19)); + assert!(xs.insert(4)); + assert!(ys.insert(2)); + assert!(ys.insert(-11)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(ys.insert(7)); + assert!(!xs.is_disjoint(&ys)); + assert!(!ys.is_disjoint(&xs)); + } + + #[test] + fn test_subset_and_superset() { + let mut a = HashSet::new(); + assert!(a.insert(0)); + assert!(a.insert(5)); + assert!(a.insert(11)); + assert!(a.insert(7)); + + let mut b = HashSet::new(); + assert!(b.insert(0)); + assert!(b.insert(7)); + assert!(b.insert(19)); + assert!(b.insert(250)); + assert!(b.insert(11)); + assert!(b.insert(200)); + + assert!(!a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(!b.is_superset(&a)); + + assert!(b.insert(5)); + + assert!(a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(b.is_superset(&a)); + } + + #[test] + fn test_iterate() { + let mut a = HashSet::new(); + for i in 0..32 { + assert!(a.insert(i)); + } + let mut observed: u32 = 0; + for k in &a { + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_intersection() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(11)); + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(77)); + assert!(a.insert(103)); + assert!(a.insert(5)); + assert!(a.insert(-5)); + + assert!(b.insert(2)); + assert!(b.insert(11)); + assert!(b.insert(77)); + assert!(b.insert(-9)); + assert!(b.insert(-42)); + assert!(b.insert(5)); + assert!(b.insert(3)); + + let mut i = 0; + let expected = [3, 5, 11, 77]; + for x in a.intersection(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(3)); + assert!(b.insert(9)); + + let mut i = 0; + let expected = [1, 5, 11]; + for x in a.difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_symmetric_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(-2)); + assert!(b.insert(3)); + assert!(b.insert(9)); + assert!(b.insert(14)); + assert!(b.insert(22)); + + let mut i = 0; + let expected = [-2, 1, 5, 11, 14, 22]; + for x in a.symmetric_difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_union() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + assert!(a.insert(16)); + assert!(a.insert(19)); + assert!(a.insert(24)); + + assert!(b.insert(-2)); + assert!(b.insert(1)); + assert!(b.insert(5)); + assert!(b.insert(9)); + assert!(b.insert(13)); + assert!(b.insert(19)); + + let mut i = 0; + let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]; + for x in a.union(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_from_iter() { + let xs = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: HashSet<_> = xs.iter().cloned().collect(); + + for x in &xs { + assert!(set.contains(x)); + } + } + + #[test] + fn test_move_iter() { + let hs = { + let mut hs = HashSet::new(); + + hs.insert('a'); + hs.insert('b'); + + hs + }; + + let v = hs.into_iter().collect::>(); + assert!(v == ['a', 'b'] || v == ['b', 'a']); + } + + #[test] + fn test_eq() { + // These constants once happened to expose a bug in insert(). + // I'm keeping them around to prevent a regression. + let mut s1 = HashSet::new(); + + s1.insert(1); + s1.insert(2); + s1.insert(3); + + let mut s2 = HashSet::new(); + + s2.insert(1); + s2.insert(2); + + assert!(s1 != s2); + + s2.insert(3); + + assert_eq!(s1, s2); + } + + #[test] + fn test_show() { + let mut set = HashSet::new(); + let empty = HashSet::::new(); + + set.insert(1); + set.insert(2); + + let set_str = format!("{:?}", set); + + assert!(set_str == "{1, 2}" || set_str == "{2, 1}"); + assert_eq!(format!("{:?}", empty), "{}"); + } + + #[test] + fn test_trivial_drain() { + let mut s = HashSet::::new(); + for _ in s.drain() {} + assert!(s.is_empty()); + drop(s); + + let mut s = HashSet::::new(); + drop(s.drain()); + assert!(s.is_empty()); + } + + #[test] + fn test_drain() { + let mut s: HashSet<_> = (1..100).collect(); + + // try this a bunch of times to make sure we don't screw up internal state. + for _ in 0..20 { + assert_eq!(s.len(), 99); + + { + let mut last_i = 0; + let mut d = s.drain(); + for (i, x) in d.by_ref().take(50).enumerate() { + last_i = i; + assert!(x != 0); + } + assert_eq!(last_i, 49); + } + + for _ in &s { + panic!("s should be empty!"); + } + + // reset to try again. + s.extend(1..100); + } + } + + #[test] + fn test_replace() { + use hash; + + #[derive(Debug)] + struct Foo(&'static str, i32); + + impl PartialEq for Foo { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + } + + impl Eq for Foo {} + + impl hash::Hash for Foo { + fn hash(&self, h: &mut H) { + self.0.hash(h); + } + } + + let mut s = HashSet::new(); + assert_eq!(s.replace(Foo("a", 1)), None); + assert_eq!(s.len(), 1); + assert_eq!(s.replace(Foo("a", 2)), Some(Foo("a", 1))); + assert_eq!(s.len(), 1); + + let mut it = s.iter(); + assert_eq!(it.next(), Some(&Foo("a", 2))); + assert_eq!(it.next(), None); + } + + #[test] + fn test_extend_ref() { + let mut a = HashSet::new(); + a.insert(1); + + a.extend(&[2, 3, 4]); + + assert_eq!(a.len(), 4); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + + let mut b = HashSet::new(); + b.insert(5); + b.insert(6); + + a.extend(&b); + + assert_eq!(a.len(), 6); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + assert!(a.contains(&5)); + assert!(a.contains(&6)); + } +} diff --git a/ctr-std/src/collections/hash/table.rs b/ctr-std/src/collections/hash/table.rs new file mode 100644 index 0000000..4fd4abf --- /dev/null +++ b/ctr-std/src/collections/hash/table.rs @@ -0,0 +1,1071 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(deprecated)] + +use alloc::heap::{EMPTY, allocate, deallocate}; + +use cmp; +use hash::{BuildHasher, Hash, Hasher}; +use intrinsics::needs_drop; +use marker; +use mem::{align_of, size_of}; +use mem; +use ops::{Deref, DerefMut}; +use ptr::{self, Unique, Shared}; + +use self::BucketState::*; + +/// Integer type used for stored hash values. +/// +/// No more than bit_width(usize) bits are needed to select a bucket. +/// +/// The most significant bit is ours to use for tagging `SafeHash`. +/// +/// (Even if we could have usize::MAX bytes allocated for buckets, +/// each bucket stores at least a `HashUint`, so there can be no more than +/// usize::MAX / size_of(usize) buckets.) +type HashUint = usize; + +const EMPTY_BUCKET: HashUint = 0; + +/// The raw hashtable, providing safe-ish access to the unzipped and highly +/// optimized arrays of hashes, and key-value pairs. +/// +/// This design is a lot faster than the naive +/// `Vec>`, because we don't pay for the overhead of an +/// option on every element, and we get a generally more cache-aware design. +/// +/// Essential invariants of this structure: +/// +/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` +/// points to 'undefined' contents. Don't read from it. This invariant is +/// enforced outside this module with the `EmptyBucket`, `FullBucket`, +/// and `SafeHash` types. +/// +/// - An `EmptyBucket` is only constructed at an index with +/// a hash of EMPTY_BUCKET. +/// +/// - A `FullBucket` is only constructed at an index with a +/// non-EMPTY_BUCKET hash. +/// +/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get +/// around hashes of zero by changing them to 0x8000_0000_0000_0000, +/// which will likely map to the same bucket, while not being confused +/// with "empty". +/// +/// - Both "arrays represented by pointers" are the same length: +/// `capacity`. This is set at creation and never changes. The arrays +/// are unzipped and are more cache aware (scanning through 8 hashes +/// brings in at most 2 cache lines, since they're all right beside each +/// other). This layout may waste space in padding such as in a map from +/// u64 to u8, but is a more cache conscious layout as the key-value pairs +/// are only very shortly probed and the desired value will be in the same +/// or next cache line. +/// +/// You can kind of think of this module/data structure as a safe wrapper +/// around just the "table" part of the hashtable. It enforces some +/// invariants at the type level and employs some performance trickery, +/// but in general is just a tricked out `Vec>`. +pub struct RawTable { + capacity: usize, + size: usize, + hashes: Unique, + + // Because K/V do not appear directly in any of the types in the struct, + // inform rustc that in fact instances of K and V are reachable from here. + marker: marker::PhantomData<(K, V)>, +} + +unsafe impl Send for RawTable {} +unsafe impl Sync for RawTable {} + +struct RawBucket { + hash: *mut HashUint, + // We use *const to ensure covariance with respect to K and V + pair: *const (K, V), + _marker: marker::PhantomData<(K, V)>, +} + +impl Copy for RawBucket {} +impl Clone for RawBucket { + fn clone(&self) -> RawBucket { + *self + } +} + +pub struct Bucket { + raw: RawBucket, + idx: usize, + table: M, +} + +impl Copy for Bucket {} +impl Clone for Bucket { + fn clone(&self) -> Bucket { + *self + } +} + +pub struct EmptyBucket { + raw: RawBucket, + idx: usize, + table: M, +} + +pub struct FullBucket { + raw: RawBucket, + idx: usize, + table: M, +} + +pub type FullBucketMut<'table, K, V> = FullBucket>; + +pub enum BucketState { + Empty(EmptyBucket), + Full(FullBucket), +} + +// A GapThenFull encapsulates the state of two consecutive buckets at once. +// The first bucket, called the gap, is known to be empty. +// The second bucket is full. +pub struct GapThenFull { + gap: EmptyBucket, + full: FullBucket, +} + +/// A hash that is not zero, since we use a hash of zero to represent empty +/// buckets. +#[derive(PartialEq, Copy, Clone)] +pub struct SafeHash { + hash: HashUint, +} + +impl SafeHash { + /// Peek at the hash value, which is guaranteed to be non-zero. + #[inline(always)] + pub fn inspect(&self) -> HashUint { + self.hash + } + + #[inline(always)] + pub fn new(hash: u64) -> Self { + // We need to avoid 0 in order to prevent collisions with + // EMPTY_HASH. We can maintain our precious uniform distribution + // of initial indexes by unconditionally setting the MSB, + // effectively reducing the hashes by one bit. + // + // Truncate hash to fit in `HashUint`. + let hash_bits = size_of::() * 8; + SafeHash { hash: (1 << (hash_bits - 1)) | (hash as HashUint) } + } +} + +/// We need to remove hashes of 0. That's reserved for empty buckets. +/// This function wraps up `hash_keyed` to be the only way outside this +/// module to generate a SafeHash. +pub fn make_hash(hash_state: &S, t: &T) -> SafeHash + where T: Hash, + S: BuildHasher +{ + let mut state = hash_state.build_hasher(); + t.hash(&mut state); + SafeHash::new(state.finish()) +} + +// `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically +// ensure that a `FullBucket` points to an index with a non-zero hash, +// and a `SafeHash` is just a `HashUint` with a different name, this is +// safe. +// +// This test ensures that a `SafeHash` really IS the same size as a +// `HashUint`. If you need to change the size of `SafeHash` (and +// consequently made this test fail), `replace` needs to be +// modified to no longer assume this. +#[test] +fn can_alias_safehash_as_hash() { + assert_eq!(size_of::(), size_of::()) +} + +impl RawBucket { + unsafe fn offset(self, count: isize) -> RawBucket { + RawBucket { + hash: self.hash.offset(count), + pair: self.pair.offset(count), + _marker: marker::PhantomData, + } + } +} + +// Buckets hold references to the table. +impl FullBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + /// Get the raw index. + pub fn index(&self) -> usize { + self.idx + } +} + +impl EmptyBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } +} + +impl Bucket { + /// Get the raw index. + pub fn index(&self) -> usize { + self.idx + } +} + +impl Deref for FullBucket + where M: Deref> +{ + type Target = RawTable; + fn deref(&self) -> &RawTable { + &self.table + } +} + +/// `Put` is implemented for types which provide access to a table and cannot be invalidated +/// by filling a bucket. A similar implementation for `Take` is possible. +pub trait Put { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable; +} + + +impl<'t, K, V> Put for &'t mut RawTable { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + *self + } +} + +impl Put for Bucket + where M: Put +{ + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + self.table.borrow_table_mut() + } +} + +impl Put for FullBucket + where M: Put +{ + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + self.table.borrow_table_mut() + } +} + +impl>> Bucket { + pub fn new(table: M, hash: SafeHash) -> Bucket { + Bucket::at_index(table, hash.inspect() as usize) + } + + pub fn at_index(table: M, ib_index: usize) -> Bucket { + // if capacity is 0, then the RawBucket will be populated with bogus pointers. + // This is an uncommon case though, so avoid it in release builds. + debug_assert!(table.capacity() > 0, + "Table should have capacity at this point"); + let ib_index = ib_index & (table.capacity() - 1); + Bucket { + raw: unsafe { table.first_bucket_raw().offset(ib_index as isize) }, + idx: ib_index, + table: table, + } + } + + pub fn first(table: M) -> Bucket { + Bucket { + raw: table.first_bucket_raw(), + idx: 0, + table: table, + } + } + + /// Reads a bucket at a given index, returning an enum indicating whether + /// it's initialized or not. You need to match on this enum to get + /// the appropriate types to call most of the other functions in + /// this module. + pub fn peek(self) -> BucketState { + match unsafe { *self.raw.hash } { + EMPTY_BUCKET => { + Empty(EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table, + }) + } + _ => { + Full(FullBucket { + raw: self.raw, + idx: self.idx, + table: self.table, + }) + } + } + } + + /// Modifies the bucket pointer in place to make it point to the next slot. + pub fn next(&mut self) { + self.idx += 1; + let range = self.table.capacity(); + // This code is branchless thanks to a conditional move. + let dist = if self.idx & (range - 1) == 0 { + 1 - range as isize + } else { + 1 + }; + unsafe { + self.raw = self.raw.offset(dist); + } + } +} + +impl>> EmptyBucket { + #[inline] + pub fn next(self) -> Bucket { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + #[inline] + pub fn into_bucket(self) -> Bucket { + Bucket { + raw: self.raw, + idx: self.idx, + table: self.table, + } + } + + pub fn gap_peek(self) -> Option> { + let gap = EmptyBucket { + raw: self.raw, + idx: self.idx, + table: (), + }; + + match self.next().peek() { + Full(bucket) => { + Some(GapThenFull { + gap: gap, + full: bucket, + }) + } + Empty(..) => None, + } + } +} + +impl EmptyBucket + where M: Put +{ + /// Puts given key and value pair, along with the key's hash, + /// into this bucket in the hashtable. Note how `self` is 'moved' into + /// this function, because this slot will no longer be empty when + /// we return! A `FullBucket` is returned for later use, pointing to + /// the newly-filled slot in the hashtable. + /// + /// Use `make_hash` to construct a `SafeHash` to pass to this function. + pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { + unsafe { + *self.raw.hash = hash.inspect(); + ptr::write(self.raw.pair as *mut (K, V), (key, value)); + + self.table.borrow_table_mut().size += 1; + } + + FullBucket { + raw: self.raw, + idx: self.idx, + table: self.table, + } + } +} + +impl>> FullBucket { + #[inline] + pub fn next(self) -> Bucket { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + #[inline] + pub fn into_bucket(self) -> Bucket { + Bucket { + raw: self.raw, + idx: self.idx, + table: self.table, + } + } + + /// Duplicates the current position. This can be useful for operations + /// on two or more buckets. + pub fn stash(self) -> FullBucket { + FullBucket { + raw: self.raw, + idx: self.idx, + table: self, + } + } + + /// Get the distance between this bucket and the 'ideal' location + /// as determined by the key's hash stored in it. + /// + /// In the cited blog posts above, this is called the "distance to + /// initial bucket", or DIB. Also known as "probe count". + pub fn displacement(&self) -> usize { + // Calculates the distance one has to travel when going from + // `hash mod capacity` onwards to `idx mod capacity`, wrapping around + // if the destination is not reached before the end of the table. + (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1) + } + + #[inline] + pub fn hash(&self) -> SafeHash { + unsafe { SafeHash { hash: *self.raw.hash } } + } + + /// Gets references to the key and value at a given index. + pub fn read(&self) -> (&K, &V) { + unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) } + } +} + +// We take a mutable reference to the table instead of accepting anything that +// implements `DerefMut` to prevent fn `take` from being called on `stash`ed +// buckets. +impl<'t, K, V> FullBucket> { + /// Removes this bucket's key and value from the hashtable. + /// + /// This works similarly to `put`, building an `EmptyBucket` out of the + /// taken bucket. + pub fn take(mut self) -> (EmptyBucket>, K, V) { + self.table.size -= 1; + + unsafe { + *self.raw.hash = EMPTY_BUCKET; + let (k, v) = ptr::read(self.raw.pair); + (EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table, + }, + k, + v) + } + } +} + +// This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases +// where `M` is a full bucket or table reference type with mutable access to the table. +impl FullBucket + where M: Put +{ + pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { + unsafe { + let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); + let (old_key, old_val) = ptr::replace(self.raw.pair as *mut (K, V), (k, v)); + + (old_hash, old_key, old_val) + } + } +} + +impl FullBucket + where M: Deref> + DerefMut +{ + /// Gets mutable references to the key and value at a given index. + pub fn read_mut(&mut self) -> (&mut K, &mut V) { + let pair_mut = self.raw.pair as *mut (K, V); + unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) } + } +} + +impl<'t, K, V, M> FullBucket + where M: Deref> + 't +{ + /// Exchange a bucket state for immutable references into the table. + /// Because the underlying reference to the table is also consumed, + /// no further changes to the structure of the table are possible; + /// in exchange for this, the returned references have a longer lifetime + /// than the references returned by `read()`. + pub fn into_refs(self) -> (&'t K, &'t V) { + unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) } + } +} + +impl<'t, K, V, M> FullBucket + where M: Deref> + DerefMut + 't +{ + /// This works similarly to `into_refs`, exchanging a bucket state + /// for mutable references into the table. + pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { + let pair_mut = self.raw.pair as *mut (K, V); + unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) } + } +} + +impl GapThenFull + where M: Deref> +{ + #[inline] + pub fn full(&self) -> &FullBucket { + &self.full + } + + pub fn shift(mut self) -> Option> { + unsafe { + *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); + ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1); + } + + let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; + + match self.full.next().peek() { + Full(bucket) => { + self.gap.raw = prev_raw; + self.gap.idx = prev_idx; + + self.full = bucket; + + Some(self) + } + Empty(..) => None, + } + } +} + + +/// Rounds up to a multiple of a power of two. Returns the closest multiple +/// of `target_alignment` that is higher or equal to `unrounded`. +/// +/// # Panics +/// +/// Panics if `target_alignment` is not a power of two. +#[inline] +fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { + assert!(target_alignment.is_power_of_two()); + (unrounded + target_alignment - 1) & !(target_alignment - 1) +} + +#[test] +fn test_rounding() { + assert_eq!(round_up_to_next(0, 4), 0); + assert_eq!(round_up_to_next(1, 4), 4); + assert_eq!(round_up_to_next(2, 4), 4); + assert_eq!(round_up_to_next(3, 4), 4); + assert_eq!(round_up_to_next(4, 4), 4); + assert_eq!(round_up_to_next(5, 4), 8); +} + +// Returns a tuple of (pairs_offset, end_of_pairs_offset), +// from the start of a mallocated array. +#[inline] +fn calculate_offsets(hashes_size: usize, + pairs_size: usize, + pairs_align: usize) + -> (usize, usize, bool) { + let pairs_offset = round_up_to_next(hashes_size, pairs_align); + let (end_of_pairs, oflo) = pairs_offset.overflowing_add(pairs_size); + + (pairs_offset, end_of_pairs, oflo) +} + +// Returns a tuple of (minimum required malloc alignment, hash_offset, +// array_size), from the start of a mallocated array. +fn calculate_allocation(hash_size: usize, + hash_align: usize, + pairs_size: usize, + pairs_align: usize) + -> (usize, usize, usize, bool) { + let hash_offset = 0; + let (_, end_of_pairs, oflo) = calculate_offsets(hash_size, pairs_size, pairs_align); + + let align = cmp::max(hash_align, pairs_align); + + (align, hash_offset, end_of_pairs, oflo) +} + +#[test] +fn test_offset_calculation() { + assert_eq!(calculate_allocation(128, 8, 16, 8), (8, 0, 144, false)); + assert_eq!(calculate_allocation(3, 1, 2, 1), (1, 0, 5, false)); + assert_eq!(calculate_allocation(6, 2, 12, 4), (4, 0, 20, false)); + assert_eq!(calculate_offsets(128, 15, 4), (128, 143, false)); + assert_eq!(calculate_offsets(3, 2, 4), (4, 6, false)); + assert_eq!(calculate_offsets(6, 12, 4), (8, 20, false)); +} + +impl RawTable { + /// Does not initialize the buckets. The caller should ensure they, + /// at the very least, set every hash to EMPTY_BUCKET. + unsafe fn new_uninitialized(capacity: usize) -> RawTable { + if capacity == 0 { + return RawTable { + size: 0, + capacity: 0, + hashes: Unique::new(EMPTY as *mut HashUint), + marker: marker::PhantomData, + }; + } + + // No need for `checked_mul` before a more restrictive check performed + // later in this method. + let hashes_size = capacity.wrapping_mul(size_of::()); + let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>()); + + // Allocating hashmaps is a little tricky. We need to allocate two + // arrays, but since we know their sizes and alignments up front, + // we just allocate a single array, and then have the subarrays + // point into it. + // + // This is great in theory, but in practice getting the alignment + // right is a little subtle. Therefore, calculating offsets has been + // factored out into a different function. + let (alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size, + align_of::(), + pairs_size, + align_of::<(K, V)>()); + assert!(!oflo, "capacity overflow"); + + // One check for overflow that covers calculation and rounding of size. + let size_of_bucket = size_of::().checked_add(size_of::<(K, V)>()).unwrap(); + assert!(size >= + capacity.checked_mul(size_of_bucket) + .expect("capacity overflow"), + "capacity overflow"); + + let buffer = allocate(size, alignment); + if buffer.is_null() { + ::alloc::oom() + } + + let hashes = buffer.offset(hash_offset as isize) as *mut HashUint; + + RawTable { + capacity: capacity, + size: 0, + hashes: Unique::new(hashes), + marker: marker::PhantomData, + } + } + + fn first_bucket_raw(&self) -> RawBucket { + let hashes_size = self.capacity * size_of::(); + let pairs_size = self.capacity * size_of::<(K, V)>(); + + let buffer = *self.hashes as *mut u8; + let (pairs_offset, _, oflo) = + calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>()); + debug_assert!(!oflo, "capacity overflow"); + unsafe { + RawBucket { + hash: *self.hashes, + pair: buffer.offset(pairs_offset as isize) as *const _, + _marker: marker::PhantomData, + } + } + } + + /// Creates a new raw table from a given capacity. All buckets are + /// initially empty. + pub fn new(capacity: usize) -> RawTable { + unsafe { + let ret = RawTable::new_uninitialized(capacity); + ptr::write_bytes(*ret.hashes, 0, capacity); + ret + } + } + + /// The hashtable's capacity, similar to a vector's. + pub fn capacity(&self) -> usize { + self.capacity + } + + /// The number of elements ever `put` in the hashtable, minus the number + /// of elements ever `take`n. + pub fn size(&self) -> usize { + self.size + } + + fn raw_buckets(&self) -> RawBuckets { + RawBuckets { + raw: self.first_bucket_raw(), + hashes_end: unsafe { self.hashes.offset(self.capacity as isize) }, + marker: marker::PhantomData, + } + } + + pub fn iter(&self) -> Iter { + Iter { + iter: self.raw_buckets(), + elems_left: self.size(), + } + } + + pub fn iter_mut(&mut self) -> IterMut { + IterMut { + iter: self.raw_buckets(), + elems_left: self.size(), + _marker: marker::PhantomData, + } + } + + pub fn into_iter(self) -> IntoIter { + let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); + // Replace the marker regardless of lifetime bounds on parameters. + IntoIter { + iter: RawBuckets { + raw: raw, + hashes_end: hashes_end, + marker: marker::PhantomData, + }, + table: self, + } + } + + pub fn drain(&mut self) -> Drain { + let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); + // Replace the marker regardless of lifetime bounds on parameters. + Drain { + iter: RawBuckets { + raw: raw, + hashes_end: hashes_end, + marker: marker::PhantomData, + }, + table: unsafe { Shared::new(self) }, + marker: marker::PhantomData, + } + } + + /// Returns an iterator that copies out each entry. Used while the table + /// is being dropped. + unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { + let raw_bucket = self.first_bucket_raw(); + RevMoveBuckets { + raw: raw_bucket.offset(self.capacity as isize), + hashes_end: raw_bucket.hash, + elems_left: self.size, + marker: marker::PhantomData, + } + } +} + +/// A raw iterator. The basis for some other iterators in this module. Although +/// this interface is safe, it's not used outside this module. +struct RawBuckets<'a, K, V> { + raw: RawBucket, + hashes_end: *mut HashUint, + + // Strictly speaking, this should be &'a (K,V), but that would + // require that K:'a, and we often use RawBuckets<'static...> for + // move iterations, so that messes up a lot of other things. So + // just use `&'a (K,V)` as this is not a publicly exposed type + // anyway. + marker: marker::PhantomData<&'a ()>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for RawBuckets<'a, K, V> { + fn clone(&self) -> RawBuckets<'a, K, V> { + RawBuckets { + raw: self.raw, + hashes_end: self.hashes_end, + marker: marker::PhantomData, + } + } +} + + +impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { + type Item = RawBucket; + + fn next(&mut self) -> Option> { + while self.raw.hash != self.hashes_end { + unsafe { + // We are swapping out the pointer to a bucket and replacing + // it with the pointer to the next one. + let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); + if *prev.hash != EMPTY_BUCKET { + return Some(prev); + } + } + } + + None + } +} + +/// An iterator that moves out buckets in reverse order. It leaves the table +/// in an inconsistent state and should only be used for dropping +/// the table's remaining entries. It's used in the implementation of Drop. +struct RevMoveBuckets<'a, K, V> { + raw: RawBucket, + hashes_end: *mut HashUint, + elems_left: usize, + + // As above, `&'a (K,V)` would seem better, but we often use + // 'static for the lifetime, and this is not a publicly exposed + // type. + marker: marker::PhantomData<&'a ()>, +} + +impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { + type Item = (K, V); + + fn next(&mut self) -> Option<(K, V)> { + if self.elems_left == 0 { + return None; + } + + loop { + debug_assert!(self.raw.hash != self.hashes_end); + + unsafe { + self.raw = self.raw.offset(-1); + + if *self.raw.hash != EMPTY_BUCKET { + self.elems_left -= 1; + return Some(ptr::read(self.raw.pair)); + } + } + } + } +} + +/// Iterator over shared references to entries in a table. +pub struct Iter<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + elems_left: usize, +} + +unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {} +unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for Iter<'a, K, V> { + fn clone(&self) -> Iter<'a, K, V> { + Iter { + iter: self.iter.clone(), + elems_left: self.elems_left, + } + } +} + + +/// Iterator over mutable references to entries in a table. +pub struct IterMut<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + elems_left: usize, + // To ensure invariance with respect to V + _marker: marker::PhantomData<&'a mut V>, +} + +unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {} +// Both K: Sync and K: Send are correct for IterMut's Send impl, +// but Send is the more useful bound +unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} + +/// Iterator over the entries in a table, consuming the table. +pub struct IntoIter { + table: RawTable, + iter: RawBuckets<'static, K, V>, +} + +unsafe impl Sync for IntoIter {} +unsafe impl Send for IntoIter {} + +/// Iterator over the entries in a table, clearing the table. +pub struct Drain<'a, K: 'a, V: 'a> { + table: Shared>, + iter: RawBuckets<'static, K, V>, + marker: marker::PhantomData<&'a RawTable>, +} + +unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {} +unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.iter.next().map(|bucket| { + self.elems_left -= 1; + unsafe { (&(*bucket.pair).0, &(*bucket.pair).1) } + }) + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } +} +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + fn len(&self) -> usize { + self.elems_left + } +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.iter.next().map(|bucket| { + self.elems_left -= 1; + let pair_mut = bucket.pair as *mut (K, V); + unsafe { (&(*pair_mut).0, &mut (*pair_mut).1) } + }) + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } +} +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + fn len(&self) -> usize { + self.elems_left + } +} + +impl Iterator for IntoIter { + type Item = (SafeHash, K, V); + + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|bucket| { + self.table.size -= 1; + unsafe { + let (k, v) = ptr::read(bucket.pair); + (SafeHash { hash: *bucket.hash }, k, v) + } + }) + } + + fn size_hint(&self) -> (usize, Option) { + let size = self.table.size(); + (size, Some(size)) + } +} +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.table.size() + } +} + +impl<'a, K, V> Iterator for Drain<'a, K, V> { + type Item = (SafeHash, K, V); + + #[inline] + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|bucket| { + unsafe { + (**self.table).size -= 1; + let (k, v) = ptr::read(bucket.pair); + (SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) }, k, v) + } + }) + } + + fn size_hint(&self) -> (usize, Option) { + let size = unsafe { (**self.table).size() }; + (size, Some(size)) + } +} +impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { + fn len(&self) -> usize { + unsafe { (**self.table).size() } + } +} + +impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { + fn drop(&mut self) { + for _ in self {} + } +} + +impl Clone for RawTable { + fn clone(&self) -> RawTable { + unsafe { + let mut new_ht = RawTable::new_uninitialized(self.capacity()); + + { + let cap = self.capacity(); + let mut new_buckets = Bucket::first(&mut new_ht); + let mut buckets = Bucket::first(self); + while buckets.index() != cap { + match buckets.peek() { + Full(full) => { + let (h, k, v) = { + let (k, v) = full.read(); + (full.hash(), k.clone(), v.clone()) + }; + *new_buckets.raw.hash = h.inspect(); + ptr::write(new_buckets.raw.pair as *mut (K, V), (k, v)); + } + Empty(..) => { + *new_buckets.raw.hash = EMPTY_BUCKET; + } + } + new_buckets.next(); + buckets.next(); + } + }; + + new_ht.size = self.size(); + + new_ht + } + } +} + +impl Drop for RawTable { + #[unsafe_destructor_blind_to_params] + fn drop(&mut self) { + if self.capacity == 0 { + return; + } + + // This is done in reverse because we've likely partially taken + // some elements out with `.into_iter()` from the front. + // Check if the size is 0, so we don't do a useless scan when + // dropping empty tables such as on resize. + // Also avoid double drop of elements that have been already moved out. + unsafe { + if needs_drop::<(K, V)>() { + // avoid linear runtime for types that don't need drop + for _ in self.rev_move_buckets() {} + } + } + + let hashes_size = self.capacity * size_of::(); + let pairs_size = self.capacity * size_of::<(K, V)>(); + let (align, _, size, oflo) = calculate_allocation(hashes_size, + align_of::(), + pairs_size, + align_of::<(K, V)>()); + + debug_assert!(!oflo, "should be impossible"); + + unsafe { + deallocate(*self.hashes as *mut u8, size, align); + // Remember how everything was allocated out of one buffer + // during initialization? We only need one call to free here. + } + } +} diff --git a/ctr-std/src/collections/mod.rs b/ctr-std/src/collections/mod.rs index 464ab25..b9e92a0 100644 --- a/ctr-std/src/collections/mod.rs +++ b/ctr-std/src/collections/mod.rs @@ -430,18 +430,17 @@ pub use core_collections::{binary_heap, btree_map, btree_set}; #[stable(feature = "rust1", since = "1.0.0")] pub use core_collections::{linked_list, vec_deque}; -#[cfg(feature = "not_yet_implemented")] +#[stable(feature = "rust1", since = "1.0.0")] pub use self::hash_map::HashMap; -#[cfg(feature = "not_yet_implemented")] +#[stable(feature = "rust1", since = "1.0.0")] pub use self::hash_set::HashSet; #[stable(feature = "rust1", since = "1.0.0")] pub use core_collections::range; -#[cfg(feature = "not_yet_implemented")] mod hash; -#[cfg(feature = "not_yet_implemented")] +#[stable(feature = "rust1", since = "1.0.0")] pub mod hash_map { //! A hash map implementation which uses linear probing with Robin //! Hood bucket stealing. @@ -449,7 +448,7 @@ pub mod hash_map { pub use super::hash::map::*; } -#[cfg(feature = "not_yet_implemented")] +#[stable(feature = "rust1", since = "1.0.0")] pub mod hash_set { //! An implementation of a hash set using the underlying representation of a //! HashMap where the value is (). diff --git a/ctr-std/src/lib.rs b/ctr-std/src/lib.rs index 5b9341a..3724cc4 100644 --- a/ctr-std/src/lib.rs +++ b/ctr-std/src/lib.rs @@ -11,11 +11,13 @@ #![feature(core_intrinsics)] #![feature(char_escape_debug)] #![feature(dropck_eyepatch)] +#![feature(dropck_parametricity)] #![feature(float_extras)] #![feature(fn_traits)] #![feature(fnbox)] #![feature(fused)] #![feature(generic_param_attrs)] +#![feature(heap_api)] #![feature(int_error_internals)] #![feature(integer_atomics)] #![feature(lang_items)] @@ -25,7 +27,9 @@ #![feature(optin_builtin_traits)] #![feature(prelude_import)] #![feature(raw)] +#![feature(rand)] #![feature(shared)] +#![feature(sip_hash_13)] #![feature(slice_concat_ext)] #![feature(slice_patterns)] #![feature(staged_api)] @@ -54,6 +58,7 @@ extern crate core as __core; #[macro_reexport(vec, format)] extern crate collections as core_collections; +#[allow(deprecated)] extern crate rand as core_rand; extern crate alloc; extern crate std_unicode; extern crate alloc_system; @@ -169,12 +174,24 @@ mod sys; // Private support modules mod panicking; +mod rand; mod memchr; // The runtime entry point and a few unstable public functions used by the // compiler pub mod rt; +// Some external utilities of the standard library rely on randomness (aka +// rustc_back::TempDir and tests) and need a way to get at the OS rng we've got +// here. This module is not at all intended for stabilization as-is, however, +// but it may be stabilized long-term. As a result we're exposing a hidden, +// unstable module so we can get our build working. +#[doc(hidden)] +#[unstable(feature = "rand", issue = "0")] +pub mod __rand { + pub use rand::{thread_rng, ThreadRng, Rng}; +} + // NOTE: These two are "undefined" symbols that LLVM emits but that // we never actually use #[doc(hidden)] diff --git a/ctr-std/src/rand/mod.rs b/ctr-std/src/rand/mod.rs new file mode 100644 index 0000000..b853e83 --- /dev/null +++ b/ctr-std/src/rand/mod.rs @@ -0,0 +1,286 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Utilities for random number generation +//! +//! The key functions are `random()` and `Rng::gen()`. These are polymorphic +//! and so can be used to generate any type that implements `Rand`. Type inference +//! means that often a simple call to `rand::random()` or `rng.gen()` will +//! suffice, but sometimes an annotation is required, e.g. `rand::random::()`. +//! +//! See the `distributions` submodule for sampling random numbers from +//! distributions like normal and exponential. +//! +//! # Thread-local RNG +//! +//! There is built-in support for a RNG associated with each thread stored +//! in thread-local storage. This RNG can be accessed via `thread_rng`, or +//! used implicitly via `random`. This RNG is normally randomly seeded +//! from an operating-system source of randomness, e.g. `/dev/urandom` on +//! Unix systems, and will automatically reseed itself from this source +//! after generating 32 KiB of random data. +//! +//! # Cryptographic security +//! +//! An application that requires an entropy source for cryptographic purposes +//! must use `OsRng`, which reads randomness from the source that the operating +//! system provides (e.g. `/dev/urandom` on Unixes or `CryptGenRandom()` on Windows). +//! The other random number generators provided by this module are not suitable +//! for such purposes. +//! +//! *Note*: many Unix systems provide `/dev/random` as well as `/dev/urandom`. +//! This module uses `/dev/urandom` for the following reasons: +//! +//! - On Linux, `/dev/random` may block if entropy pool is empty; `/dev/urandom` will not block. +//! This does not mean that `/dev/random` provides better output than +//! `/dev/urandom`; the kernel internally runs a cryptographically secure pseudorandom +//! number generator (CSPRNG) based on entropy pool for random number generation, +//! so the "quality" of `/dev/random` is not better than `/dev/urandom` in most cases. +//! However, this means that `/dev/urandom` can yield somewhat predictable randomness +//! if the entropy pool is very small, such as immediately after first booting. +//! Linux 3.17 added the `getrandom(2)` system call which solves the issue: it blocks if entropy +//! pool is not initialized yet, but it does not block once initialized. +//! `getrandom(2)` was based on `getentropy(2)`, an existing system call in OpenBSD. +//! `OsRng` tries to use `getrandom(2)` if available, and use `/dev/urandom` fallback if not. +//! If an application does not have `getrandom` and likely to be run soon after first booting, +//! or on a system with very few entropy sources, one should consider using `/dev/random` via +//! `ReaderRng`. +//! - On some systems (e.g. FreeBSD, OpenBSD and Mac OS X) there is no difference +//! between the two sources. (Also note that, on some systems e.g. FreeBSD, both `/dev/random` +//! and `/dev/urandom` may block once if the CSPRNG has not seeded yet.) + +#![unstable(feature = "rand", issue = "0")] + +use cell::RefCell; +use fmt; +use io; +use mem; +use rc::Rc; +use sys; + +#[cfg(target_pointer_width = "32")] +use core_rand::IsaacRng as IsaacWordRng; +#[cfg(target_pointer_width = "64")] +use core_rand::Isaac64Rng as IsaacWordRng; + +pub use core_rand::{Rand, Rng, SeedableRng}; +pub use core_rand::{XorShiftRng, IsaacRng, Isaac64Rng}; +pub use core_rand::reseeding; + +pub mod reader; + +/// The standard RNG. This is designed to be efficient on the current +/// platform. +#[derive(Copy, Clone)] +pub struct StdRng { + rng: IsaacWordRng, +} + +impl StdRng { + /// Create a randomly seeded instance of `StdRng`. + /// + /// This is a very expensive operation as it has to read + /// randomness from the operating system and use this in an + /// expensive seeding operation. If one is only generating a small + /// number of random numbers, or doesn't need the utmost speed for + /// generating each number, `thread_rng` and/or `random` may be more + /// appropriate. + /// + /// Reading the randomness from the OS may fail, and any error is + /// propagated via the `io::Result` return value. + pub fn new() -> io::Result { + OsRng::new().map(|mut r| StdRng { rng: r.gen() }) + } +} + +impl Rng for StdRng { + #[inline] + fn next_u32(&mut self) -> u32 { + self.rng.next_u32() + } + + #[inline] + fn next_u64(&mut self) -> u64 { + self.rng.next_u64() + } +} + +impl<'a> SeedableRng<&'a [usize]> for StdRng { + fn reseed(&mut self, seed: &'a [usize]) { + // the internal RNG can just be seeded from the above + // randomness. + self.rng.reseed(unsafe {mem::transmute(seed)}) + } + + fn from_seed(seed: &'a [usize]) -> StdRng { + StdRng { rng: SeedableRng::from_seed(unsafe {mem::transmute(seed)}) } + } +} + +/// Controls how the thread-local RNG is reseeded. +struct ThreadRngReseeder; + +impl reseeding::Reseeder for ThreadRngReseeder { + fn reseed(&mut self, rng: &mut StdRng) { + *rng = match StdRng::new() { + Ok(r) => r, + Err(e) => panic!("could not reseed thread_rng: {}", e) + } + } +} +const THREAD_RNG_RESEED_THRESHOLD: usize = 32_768; +type ThreadRngInner = reseeding::ReseedingRng; + +/// The thread-local RNG. +#[derive(Clone)] +pub struct ThreadRng { + rng: Rc>, +} + +impl fmt::Debug for ThreadRng { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.pad("ThreadRng { .. }") + } +} + +/// Retrieve the lazily-initialized thread-local random number +/// generator, seeded by the system. Intended to be used in method +/// chaining style, e.g. `thread_rng().gen::()`. +/// +/// The RNG provided will reseed itself from the operating system +/// after generating a certain amount of randomness. +/// +/// The internal RNG used is platform and architecture dependent, even +/// if the operating system random number generator is rigged to give +/// the same sequence always. If absolute consistency is required, +/// explicitly select an RNG, e.g. `IsaacRng` or `Isaac64Rng`. +pub fn thread_rng() -> ThreadRng { + // used to make space in TLS for a random number generator + thread_local!(static THREAD_RNG_KEY: Rc> = { + let r = match StdRng::new() { + Ok(r) => r, + Err(e) => panic!("could not initialize thread_rng: {}", e) + }; + let rng = reseeding::ReseedingRng::new(r, + THREAD_RNG_RESEED_THRESHOLD, + ThreadRngReseeder); + Rc::new(RefCell::new(rng)) + }); + + ThreadRng { rng: THREAD_RNG_KEY.with(|t| t.clone()) } +} + +impl Rng for ThreadRng { + fn next_u32(&mut self) -> u32 { + self.rng.borrow_mut().next_u32() + } + + fn next_u64(&mut self) -> u64 { + self.rng.borrow_mut().next_u64() + } + + #[inline] + fn fill_bytes(&mut self, bytes: &mut [u8]) { + self.rng.borrow_mut().fill_bytes(bytes) + } +} + +/// A random number generator that retrieves randomness straight from +/// the operating system. Platform sources: +/// +/// - Unix-like systems (Linux, Android, Mac OSX): read directly from +/// `/dev/urandom`, or from `getrandom(2)` system call if available. +/// - Windows: calls `CryptGenRandom`, using the default cryptographic +/// service provider with the `PROV_RSA_FULL` type. +/// - iOS: calls SecRandomCopyBytes as /dev/(u)random is sandboxed. +/// - OpenBSD: uses the `getentropy(2)` system call. +/// +/// This does not block. +pub struct OsRng(sys::rand::OsRng); + +impl OsRng { + /// Create a new `OsRng`. + pub fn new() -> io::Result { + sys::rand::OsRng::new().map(OsRng) + } +} + +impl Rng for OsRng { + #[inline] + fn next_u32(&mut self) -> u32 { + self.0.next_u32() + } + + #[inline] + fn next_u64(&mut self) -> u64 { + self.0.next_u64() + } + + #[inline] + fn fill_bytes(&mut self, bytes: &mut [u8]) { + self.0.fill_bytes(bytes) + } +} + + +#[cfg(test)] +mod tests { + use sync::mpsc::channel; + use rand::Rng; + use super::OsRng; + use thread; + + #[test] + fn test_os_rng() { + let mut r = OsRng::new().unwrap(); + + r.next_u32(); + r.next_u64(); + + let mut v = [0; 1000]; + r.fill_bytes(&mut v); + } + + #[test] + #[cfg_attr(target_os = "emscripten", ignore)] + fn test_os_rng_tasks() { + + let mut txs = vec![]; + for _ in 0..20 { + let (tx, rx) = channel(); + txs.push(tx); + + thread::spawn(move|| { + // wait until all the threads are ready to go. + rx.recv().unwrap(); + + // deschedule to attempt to interleave things as much + // as possible (XXX: is this a good test?) + let mut r = OsRng::new().unwrap(); + thread::yield_now(); + let mut v = [0; 1000]; + + for _ in 0..100 { + r.next_u32(); + thread::yield_now(); + r.next_u64(); + thread::yield_now(); + r.fill_bytes(&mut v); + thread::yield_now(); + } + }); + } + + // start all the threads + for tx in &txs { + tx.send(()).unwrap(); + } + } +} diff --git a/ctr-std/src/rand/reader.rs b/ctr-std/src/rand/reader.rs new file mode 100644 index 0000000..08bc809 --- /dev/null +++ b/ctr-std/src/rand/reader.rs @@ -0,0 +1,108 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! A wrapper around any Read to treat it as an RNG. + +#![allow(dead_code)] + +use io::prelude::*; +use rand::Rng; + +/// An RNG that reads random bytes straight from a `Read`. This will +/// work best with an infinite reader, but this is not required. +/// +/// # Panics +/// +/// It will panic if it there is insufficient data to fulfill a request. +pub struct ReaderRng { + reader: R +} + +impl ReaderRng { + /// Create a new `ReaderRng` from a `Read`. + pub fn new(r: R) -> ReaderRng { + ReaderRng { + reader: r + } + } +} + +impl Rng for ReaderRng { + fn next_u32(&mut self) -> u32 { + // This is designed for speed: reading a LE integer on a LE + // platform just involves blitting the bytes into the memory + // of the u32, similarly for BE on BE; avoiding byteswapping. + let mut bytes = [0; 4]; + self.fill_bytes(&mut bytes); + unsafe { *(bytes.as_ptr() as *const u32) } + } + fn next_u64(&mut self) -> u64 { + // see above for explanation. + let mut bytes = [0; 8]; + self.fill_bytes(&mut bytes); + unsafe { *(bytes.as_ptr() as *const u64) } + } + fn fill_bytes(&mut self, mut v: &mut [u8]) { + while !v.is_empty() { + let t = v; + match self.reader.read(t) { + Ok(0) => panic!("ReaderRng.fill_bytes: EOF reached"), + Ok(n) => v = t.split_at_mut(n).1, + Err(e) => panic!("ReaderRng.fill_bytes: {}", e), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::ReaderRng; + use rand::Rng; + + #[test] + fn test_reader_rng_u64() { + // transmute from the target to avoid endianness concerns. + let v = &[0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 3][..]; + let mut rng = ReaderRng::new(v); + + assert_eq!(rng.next_u64(), 1u64.to_be()); + assert_eq!(rng.next_u64(), 2u64.to_be()); + assert_eq!(rng.next_u64(), 3u64.to_be()); + } + #[test] + fn test_reader_rng_u32() { + let v = &[0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3][..]; + let mut rng = ReaderRng::new(v); + + assert_eq!(rng.next_u32(), 1u32.to_be()); + assert_eq!(rng.next_u32(), 2u32.to_be()); + assert_eq!(rng.next_u32(), 3u32.to_be()); + } + #[test] + fn test_reader_rng_fill_bytes() { + let v = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut w = [0; 8]; + + let mut rng = ReaderRng::new(&v[..]); + rng.fill_bytes(&mut w); + + assert!(v == w); + } + + #[test] + #[should_panic] + fn test_reader_rng_insufficient_bytes() { + let mut rng = ReaderRng::new(&[][..]); + let mut v = [0; 3]; + rng.fill_bytes(&mut v); + } +} diff --git a/ctr-std/src/sys/unix/mod.rs b/ctr-std/src/sys/unix/mod.rs index 5e44d34..4ac7a22 100644 --- a/ctr-std/src/sys/unix/mod.rs +++ b/ctr-std/src/sys/unix/mod.rs @@ -26,6 +26,7 @@ pub mod os_str; pub mod path; pub mod rwlock; pub mod thread; +pub mod rand; pub mod thread_local; pub mod time; diff --git a/ctr-std/src/sys/unix/rand.rs b/ctr-std/src/sys/unix/rand.rs new file mode 100644 index 0000000..7fdc166 --- /dev/null +++ b/ctr-std/src/sys/unix/rand.rs @@ -0,0 +1,54 @@ +// Copyright 2013-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use io::{self, Error, ErrorKind}; +use mem; +use rand::Rng; + +use libctru::services::sslc::{sslcInit, sslcExit, sslcGenerateRandomData}; + +pub struct OsRng(()); + +impl OsRng { + pub fn new() -> io::Result { + unsafe { + let r = sslcInit(0); + if r < 0 { + Err(Error::new(ErrorKind::Other, "Unable to initialize the RNG")) + } else { + Ok(OsRng(())) + } + } + } +} + +impl Rng for OsRng { + fn next_u32(&mut self) -> u32 { + let mut v = [0; 4]; + self.fill_bytes(&mut v); + unsafe { mem::transmute(v) } + } + + fn next_u64(&mut self) -> u64 { + let mut v = [0; 8]; + self.fill_bytes(&mut v); + unsafe { mem::transmute(v) } + } + + fn fill_bytes(&mut self, v: &mut [u8]) { + unsafe { sslcGenerateRandomData(v.as_ptr() as _, v.len() as u32); } + } +} + +impl Drop for OsRng { + fn drop(&mut self) { + unsafe { sslcExit() } + } +}