2 An OrderedSet is a custom MutableSet that remembers its order, so that every
3 entry has an index that can be looked up.
5 Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger,
6 and released under the MIT license.
9 from collections
import deque
13 from collections
.abc
import MutableSet
, Sequence
16 from collections
import MutableSet
, Sequence
18 SLICE_ALL
= slice(None)
24 Are we being asked to look up a list of things, instead of a single thing?
25 We check for the `__iter__` attribute so that this can cover types that
26 don't have to be known by this module, such as NumPy arrays.
28 Strings, however, should be considered as atomic values to look up, not
29 iterables. The same goes for tuples, since they are immutable and therefore
32 We don't need to check for the Python 2 `unicode` type, because it doesn't
33 have an `__iter__` attribute anyway.
36 hasattr(obj
, "__iter__")
37 and not isinstance(obj
, str)
38 and not isinstance(obj
, tuple)
42 class OrderedSet(MutableSet
, Sequence
):
44 An OrderedSet is a custom MutableSet that remembers its order, so that
45 every entry has an index that can be looked up.
48 >>> OrderedSet([1, 1, 2, 3, 2])
52 def __init__(self
, iterable
=None):
55 if iterable
is not None:
60 Returns the number of unique elements in the ordered set
63 >>> len(OrderedSet([]))
65 >>> len(OrderedSet([1, 2]))
68 return len(self
.items
)
70 def __getitem__(self
, index
):
72 Get the item at a given index.
74 If `index` is a slice, you will get back that slice of items, as a
77 If `index` is a list or a similar iterable, you'll get a list of
78 items corresponding to those indices. This is similar to NumPy's
79 "fancy indexing". The result is not an OrderedSet because you may ask
80 for duplicate indices, and the number of elements returned should be
81 the number of elements asked for.
84 >>> oset = OrderedSet([1, 2, 3])
88 if isinstance(index
, slice) and index
== SLICE_ALL
:
90 elif is_iterable(index
):
91 return [self
.items
[i
] for i
in index
]
92 elif hasattr(index
, "__index__") or isinstance(index
, slice):
93 result
= self
.items
[index
]
94 if isinstance(result
, list):
95 return self
.__class
__(result
)
99 raise TypeError("Don't know how to index an OrderedSet by %r" % index
)
103 Return a shallow copy of this object.
106 >>> this = OrderedSet([1, 2, 3])
107 >>> other = this.copy()
113 return self
.__class
__(self
)
115 def __getstate__(self
):
117 # The state can't be an empty list.
118 # We need to return a truthy value, or else __setstate__ won't be run.
120 # This could have been done more gracefully by always putting the state
121 # in a tuple, but this way is backwards- and forwards- compatible with
122 # previous versions of OrderedSet.
127 def __setstate__(self
, state
):
133 def __contains__(self
, key
):
135 Test if the item is in this ordered set
138 >>> 1 in OrderedSet([1, 3, 2])
140 >>> 5 in OrderedSet([1, 3, 2])
143 return key
in self
.map
147 Add `key` as an item to this OrderedSet, then return its index.
149 If `key` is already in the OrderedSet, return the index it already
153 >>> oset = OrderedSet()
159 if key
not in self
.map:
160 self
.map[key
] = len(self
.items
)
161 self
.items
.append(key
)
166 def update(self
, sequence
):
168 Update the set with the given iterable sequence, then return the index
169 of the last element inserted.
172 >>> oset = OrderedSet([1, 2, 3])
173 >>> oset.update([3, 1, 5, 1, 4])
176 OrderedSet([1, 2, 3, 5, 4])
180 for item
in sequence
:
181 item_index
= self
.add(item
)
184 "Argument needs to be an iterable, got %s" % type(sequence
)
188 def index(self
, key
):
190 Get the index of a given entry, raising an IndexError if it's not
193 `key` can be an iterable of entries that is not a string, in which case
194 this returns a list of indices.
197 >>> oset = OrderedSet([1, 2, 3])
202 return [self
.index(subkey
) for subkey
in key
]
205 # Provide some compatibility with pd.Index
211 Remove and return the last element from the set.
213 Raises KeyError if the set is empty.
216 >>> oset = OrderedSet([1, 2, 3])
221 raise KeyError("Set is empty")
223 elem
= self
.items
[-1]
228 def discard(self
, key
):
230 Remove an element. Do not raise an exception if absent.
232 The MutableSet mixin uses this to implement the .remove() method, which
233 *does* raise an error when asked to remove a non-existent item.
236 >>> oset = OrderedSet([1, 2, 3])
248 for k
, v
in self
.map.items():
254 Remove all items from this OrderedSet.
262 >>> list(iter(OrderedSet([1, 2, 3])))
265 return iter(self
.items
)
267 def __reversed__(self
):
270 >>> list(reversed(OrderedSet([1, 2, 3])))
273 return reversed(self
.items
)
277 return "%s()" % (self
.__class
__.__name
__,)
278 return "%s(%r)" % (self
.__class
__.__name
__, list(self
))
280 def __eq__(self
, other
):
282 Returns true if the containers have the same items. If `other` is a
283 Sequence, then order is checked, otherwise it is ignored.
286 >>> oset = OrderedSet([1, 3, 2])
287 >>> oset == [1, 3, 2]
289 >>> oset == [1, 2, 3]
293 >>> oset == OrderedSet([3, 2, 1])
296 # In Python 2 deque is not a Sequence, so treat it as one for
297 # consistent behavior with Python 3.
298 if isinstance(other
, (Sequence
, deque
)):
299 # Check that this OrderedSet contains the same elements, in the
300 # same order, as the other object.
301 return list(self
) == list(other
)
303 other_as_set
= set(other
)
305 # If `other` can't be converted into a set, it's not equal.
308 return set(self
) == other_as_set
310 def union(self
, *sets
):
312 Combines all unique items.
313 Each items order is defined by its first appearance.
316 >>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0])
318 OrderedSet([3, 1, 4, 5, 2, 0])
319 >>> oset.union([8, 9])
320 OrderedSet([3, 1, 4, 5, 2, 0, 8, 9])
322 OrderedSet([3, 1, 4, 5, 2, 0, 10])
324 cls
= self
.__class
__ if isinstance(self
, OrderedSet
) else OrderedSet
325 containers
= map(list, it
.chain([self
], sets
))
326 items
= it
.chain
.from_iterable(containers
)
329 def __and__(self
, other
):
330 # the parent implementation of this is backwards
331 return self
.intersection(other
)
333 def intersection(self
, *sets
):
335 Returns elements in common between all sets. Order is defined only
339 >>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3])
341 OrderedSet([1, 2, 3])
342 >>> oset.intersection([2, 4, 5], [1, 2, 3, 4])
344 >>> oset.intersection()
345 OrderedSet([1, 2, 3])
347 cls
= self
.__class
__ if isinstance(self
, OrderedSet
) else OrderedSet
349 common
= set.intersection(*map(set, sets
))
350 items
= (item
for item
in self
if item
in common
)
355 def difference(self
, *sets
):
357 Returns all elements that are in this set but not the others.
360 >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]))
362 >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3]))
364 >>> OrderedSet([1, 2, 3]) - OrderedSet([2])
366 >>> OrderedSet([1, 2, 3]).difference()
367 OrderedSet([1, 2, 3])
371 other
= set.union(*map(set, sets
))
372 items
= (item
for item
in self
if item
not in other
)
377 def issubset(self
, other
):
379 Report whether another set contains this set.
382 >>> OrderedSet([1, 2, 3]).issubset({1, 2})
384 >>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4})
386 >>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5})
389 if len(self
) > len(other
): # Fast check for obvious cases
391 return all(item
in other
for item
in self
)
393 def issuperset(self
, other
):
395 Report whether this set contains another set.
398 >>> OrderedSet([1, 2]).issuperset([1, 2, 3])
400 >>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3})
402 >>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3})
405 if len(self
) < len(other
): # Fast check for obvious cases
407 return all(item
in self
for item
in other
)
409 def symmetric_difference(self
, other
):
411 Return the symmetric difference of two OrderedSets as a new set.
412 That is, the new set will contain all elements that are in exactly
415 Their order will be preserved, with elements from `self` preceding
416 elements from `other`.
419 >>> this = OrderedSet([1, 4, 3, 5, 7])
420 >>> other = OrderedSet([9, 7, 1, 3, 2])
421 >>> this.symmetric_difference(other)
422 OrderedSet([4, 5, 9, 2])
424 cls
= self
.__class
__ if isinstance(self
, OrderedSet
) else OrderedSet
425 diff1
= cls(self
).difference(other
)
426 diff2
= cls(other
).difference(self
)
427 return diff1
.union(diff2
)
429 def _update_items(self
, items
):
431 Replace the 'items' list of this OrderedSet with a new one, updating
432 self.map accordingly.
435 self
.map = {item: idx for (idx, item) in enumerate(items)}
437 def difference_update(self
, *sets
):
439 Update this OrderedSet to remove items from one or more other sets.
442 >>> this = OrderedSet([1, 2, 3])
443 >>> this.difference_update(OrderedSet([2, 4]))
447 >>> this = OrderedSet([1, 2, 3, 4, 5])
448 >>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6]))
452 items_to_remove
= set()
454 items_to_remove |
= set(other
)
455 self
._update
_items
([item
for item
in self
.items
if item
not in items_to_remove
])
457 def intersection_update(self
, other
):
459 Update this OrderedSet to keep only items in another set, preserving
460 their order in this set.
463 >>> this = OrderedSet([1, 4, 3, 5, 7])
464 >>> other = OrderedSet([9, 7, 1, 3, 2])
465 >>> this.intersection_update(other)
467 OrderedSet([1, 3, 7])
470 self
._update
_items
([item
for item
in self
.items
if item
in other
])
472 def symmetric_difference_update(self
, other
):
474 Update this OrderedSet to remove items from another set, then
475 add items from the other set that were not present in this set.
478 >>> this = OrderedSet([1, 4, 3, 5, 7])
479 >>> other = OrderedSet([9, 7, 1, 3, 2])
480 >>> this.symmetric_difference_update(other)
482 OrderedSet([4, 5, 9, 2])
484 items_to_add
= [item
for item
in other
if item
not in self
]
485 items_to_remove
= set(other
)
487 [item
for item
in self
.items
if item
not in items_to_remove
] + items_to_add