[pypy-commit] pypy py3.5: hg merge dict-move-to-end
arigo
pypy.commits at gmail.com
Sun Feb 5 05:08:41 EST 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r89934:5e325b5c62d9
Date: 2017-02-05 08:29 +0100
http://bitbucket.org/pypy/pypy/changeset/5e325b5c62d9/
Log: hg merge dict-move-to-end
diff --git a/pypy/doc/stackless.rst b/pypy/doc/stackless.rst
--- a/pypy/doc/stackless.rst
+++ b/pypy/doc/stackless.rst
@@ -190,7 +190,7 @@
from GC issues: if the program "forgets" an unfinished greenlet, it will
always be collected at the next garbage collection.
-.. _documentation of the greenlets: http://packages.python.org/greenlet/
+.. _documentation of the greenlets: https://greenlet.readthedocs.io/
Unimplemented features
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -14,7 +14,7 @@
SomeUnicodeCodePoint, SomeInstance, SomeBuiltin, SomeBuiltinMethod,
SomeFloat, SomeIterator, SomePBC, SomeNone, SomeTypeOf, s_ImpossibleValue,
s_Bool, s_None, s_Int, unionof, add_knowntypedata,
- SomeWeakRef, SomeUnicodeString, SomeByteArray)
+ SomeWeakRef, SomeUnicodeString, SomeByteArray, SomeOrderedDict)
from rpython.annotator.bookkeeper import getbookkeeper, immutablevalue
from rpython.annotator.binaryop import _clone ## XXX where to put this?
from rpython.annotator.binaryop import _dict_can_only_throw_keyerror
@@ -575,6 +575,13 @@
pair(self, s_key).delitem()
method_delitem_with_hash.can_only_throw = _dict_can_only_throw_keyerror
+class __extend__(SomeOrderedDict):
+
+ def method_move_to_end(self, s_key, s_last):
+ assert s_Bool.contains(s_last)
+ pair(self, s_key).delitem()
+ method_move_to_end.can_only_throw = _dict_can_only_throw_keyerror
+
@op.contains.register(SomeString)
@op.contains.register(SomeUnicodeString)
def contains_String(annotator, string, char):
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -934,6 +934,24 @@
return
d.delitem_with_hash(key, h)
+def _untranslated_move_to_end(d, key, last):
+ "NOT_RPYTHON"
+ value = d.pop(key)
+ if last:
+ d[key] = value
+ else:
+ items = d.items()
+ d.clear()
+ d[key] = value
+ d.update(items)
+
+ at specialize.call_location()
+def move_to_end(d, key, last=True):
+ if not we_are_translated():
+ _untranslated_move_to_end(d, key, last)
+ return
+ d.move_to_end(key, last)
+
# ____________________________________________________________
def import_from_mixin(M, special_methods=['__init__', '__del__']):
diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -7,7 +7,7 @@
resizelist_hint, is_annotation_constant, always_inline, NOT_CONSTANT,
iterkeys_with_hash, iteritems_with_hash, contains_with_hash,
setitem_with_hash, getitem_with_hash, delitem_with_hash, import_from_mixin,
- fetch_translated_config, try_inline)
+ fetch_translated_config, try_inline, move_to_end)
from rpython.translator.translator import TranslationContext, graphof
from rpython.rtyper.test.tool import BaseRtypingTest
from rpython.rtyper.test.test_llinterp import interpret
@@ -679,6 +679,16 @@
assert f(29) == 0
interpret(f, [27])
+def test_rordereddict_move_to_end():
+ d = OrderedDict()
+ d['key1'] = 'val1'
+ d['key2'] = 'val2'
+ d['key3'] = 'val3'
+ move_to_end(d, 'key1')
+ assert d.items() == [('key2', 'val2'), ('key3', 'val3'), ('key1', 'val1')]
+ move_to_end(d, 'key1', last=False)
+ assert d.items() == [('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')]
+
def test_import_from_mixin():
class M: # old-style
def f(self):
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -407,6 +407,15 @@
hop.exception_is_here()
hop.gendirectcall(ll_dict_delitem_with_hash, v_dict, v_key, v_hash)
+ def rtype_method_move_to_end(self, hop):
+ v_dict, v_key, v_last = hop.inputargs(
+ self, self.key_repr, lltype.Bool)
+ if not self.custom_eq_hash:
+ hop.has_implicit_exception(KeyError) # record that we know about it
+ hop.exception_is_here()
+ hop.gendirectcall(ll_dict_move_to_end, v_dict, v_key, v_last)
+
+
class __extend__(pairtype(OrderedDictRepr, rmodel.Repr)):
def rtype_getitem((r_dict, r_key), hop):
@@ -542,16 +551,18 @@
ll_assert(False, "ll_call_insert_clean_function(): invalid lookup_fun")
assert False
-def ll_call_delete_by_entry_index(d, hash, i):
+def ll_call_delete_by_entry_index(d, hash, i, replace_with):
+ # only called from _ll_dict_del, whose @jit.look_inside_iff
+ # condition should control when we get inside here with the jit
fun = d.lookup_function_no & FUNC_MASK
if fun == FUNC_BYTE:
- ll_dict_delete_by_entry_index(d, hash, i, TYPE_BYTE)
+ ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_BYTE)
elif fun == FUNC_SHORT:
- ll_dict_delete_by_entry_index(d, hash, i, TYPE_SHORT)
+ ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_SHORT)
elif IS_64BIT and fun == FUNC_INT:
- ll_dict_delete_by_entry_index(d, hash, i, TYPE_INT)
+ ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_INT)
elif fun == FUNC_LONG:
- ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG)
+ ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_LONG)
else:
# can't be still FUNC_MUST_REINDEX here
ll_assert(False, "ll_call_delete_by_entry_index(): invalid lookup_fun")
@@ -805,13 +816,12 @@
ll_dict_delitem_with_hash(d, key, d.keyhash(key))
def ll_dict_delitem_with_hash(d, key, hash):
- index = d.lookup_function(d, key, hash, FLAG_DELETE)
+ index = d.lookup_function(d, key, hash, FLAG_LOOKUP)
if index < 0:
raise KeyError
- _ll_dict_del(d, index)
+ _ll_dict_del(d, hash, index)
- at jit.look_inside_iff(lambda d, i: jit.isvirtual(d) and jit.isconstant(i))
-def _ll_dict_del(d, index):
+def _ll_dict_del_entry(d, index):
d.entries.mark_deleted(index)
d.num_live_items -= 1
# clear the key and the value if they are GC pointers
@@ -823,6 +833,11 @@
if ENTRIES.must_clear_value:
entry.value = lltype.nullptr(ENTRY.value.TO)
+ at jit.look_inside_iff(lambda d, h, i: jit.isvirtual(d) and jit.isconstant(i))
+def _ll_dict_del(d, hash, index):
+ ll_call_delete_by_entry_index(d, hash, index, DELETED)
+ _ll_dict_del_entry(d, index)
+
if d.num_live_items == 0:
# Dict is now empty. Reset these fields.
d.num_ever_used_items = 0
@@ -963,7 +978,6 @@
FLAG_LOOKUP = 0
FLAG_STORE = 1
-FLAG_DELETE = 2
@specialize.memo()
def _ll_ptr_to_array_of(T):
@@ -985,8 +999,6 @@
if index >= VALID_OFFSET:
checkingkey = entries[index - VALID_OFFSET].key
if direct_compare and checkingkey == key:
- if store_flag == FLAG_DELETE:
- indexes[i] = rffi.cast(T, DELETED)
return index - VALID_OFFSET # found the entry
if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash:
# correct hash, maybe the key is e.g. a different pointer to
@@ -1000,8 +1012,6 @@
# the compare did major nasty stuff to the dict: start over
return ll_dict_lookup(d, key, hash, store_flag, T)
if found:
- if store_flag == FLAG_DELETE:
- indexes[i] = rffi.cast(T, DELETED)
return index - VALID_OFFSET
deletedslot = -1
elif index == DELETED:
@@ -1030,8 +1040,6 @@
elif index >= VALID_OFFSET:
checkingkey = entries[index - VALID_OFFSET].key
if direct_compare and checkingkey == key:
- if store_flag == FLAG_DELETE:
- indexes[i] = rffi.cast(T, DELETED)
return index - VALID_OFFSET # found the entry
if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash:
# correct hash, maybe the key is e.g. a different pointer to
@@ -1044,8 +1052,6 @@
# the compare did major nasty stuff to the dict: start over
return ll_dict_lookup(d, key, hash, store_flag, T)
if found:
- if store_flag == FLAG_DELETE:
- indexes[i] = rffi.cast(T, DELETED)
return index - VALID_OFFSET
elif deletedslot == -1:
deletedslot = intmask(i)
@@ -1066,7 +1072,11 @@
perturb >>= PERTURB_SHIFT
indexes[i] = rffi.cast(T, index + VALID_OFFSET)
-def ll_dict_delete_by_entry_index(d, hash, locate_index, T):
+# the following function is only called from _ll_dict_del, whose
+# @jit.look_inside_iff condition should control when we get inside
+# here with the jit
+ at jit.unroll_safe
+def ll_dict_delete_by_entry_index(d, hash, locate_index, replace_with, T):
# Another simplified version of ll_dict_lookup() which locates a
# hashtable entry with the given 'index' stored in it, and deletes it.
# This *should* be safe against evil user-level __eq__/__hash__
@@ -1083,7 +1093,7 @@
i = (i << 2) + i + perturb + 1
i = i & mask
perturb >>= PERTURB_SHIFT
- indexes[i] = rffi.cast(T, DELETED)
+ indexes[i] = rffi.cast(T, replace_with)
# ____________________________________________________________
#
@@ -1253,18 +1263,8 @@
if hasattr(DICT, 'fnkeyhash'):
newdict.fnkeyhash = dict.fnkeyhash
- i = 0
- while i < newdict.num_ever_used_items:
- d_entry = newdict.entries[i]
- entry = dict.entries[i]
- ENTRY = lltype.typeOf(newdict.entries).TO.OF
- d_entry.key = entry.key
- if hasattr(ENTRY, 'f_valid'):
- d_entry.f_valid = entry.f_valid
- d_entry.value = entry.value
- if hasattr(ENTRY, 'f_hash'):
- d_entry.f_hash = entry.f_hash
- i += 1
+ rgc.ll_arraycopy(dict.entries, newdict.entries, 0, 0,
+ newdict.num_ever_used_items)
ll_dict_reindex(newdict, _ll_len_of_d_indexes(dict))
return newdict
@@ -1390,8 +1390,6 @@
break
dic.num_ever_used_items -= 1
- # we must remove the precise entry in the hashtable that points to 'i'
- ll_call_delete_by_entry_index(dic, entries.hash(i), i)
return i
def ll_dict_popitem(ELEM, dic):
@@ -1400,21 +1398,139 @@
r = lltype.malloc(ELEM.TO)
r.item0 = recast(ELEM.TO.item0, entry.key)
r.item1 = recast(ELEM.TO.item1, entry.value)
- _ll_dict_del(dic, i)
+ _ll_dict_del(dic, dic.entries.hash(i), i)
return r
def ll_dict_pop(dic, key):
- index = dic.lookup_function(dic, key, dic.keyhash(key), FLAG_DELETE)
+ hash = dic.keyhash(key)
+ index = dic.lookup_function(dic, key, hash, FLAG_LOOKUP)
if index < 0:
raise KeyError
value = dic.entries[index].value
- _ll_dict_del(dic, index)
+ _ll_dict_del(dic, hash, index)
return value
def ll_dict_pop_default(dic, key, dfl):
- index = dic.lookup_function(dic, key, dic.keyhash(key), FLAG_DELETE)
+ hash = dic.keyhash(key)
+ index = dic.lookup_function(dic, key, hash, FLAG_LOOKUP)
if index < 0:
return dfl
value = dic.entries[index].value
- _ll_dict_del(dic, index)
+ _ll_dict_del(dic, hash, index)
return value
+
+def ll_dict_move_to_end(d, key, last):
+ if last:
+ ll_dict_move_to_last(d, key)
+ else:
+ ll_dict_move_to_first(d, key)
+
+def ll_dict_move_to_last(d, key):
+ hash = d.keyhash(key)
+ old_index = d.lookup_function(d, key, hash, FLAG_LOOKUP)
+ if old_index < 0:
+ raise KeyError
+
+ if old_index == d.num_ever_used_items - 1:
+ return
+
+ # remove the entry at the old position
+ old_entry = d.entries[old_index]
+ key = old_entry.key
+ value = old_entry.value
+ _ll_dict_del_entry(d, old_index)
+
+ # note a corner case: it is possible that 'replace_with' is just too
+ # large to fit in the type T used so far for the index. But in that
+ # case, the list 'd.entries' is full, and the following call to
+ # _ll_dict_setitem_lookup_done() will necessarily reindex the dict.
+ # So in that case, this value of 'replace_with' should be ignored.
+ ll_call_delete_by_entry_index(d, hash, old_index,
+ replace_with = VALID_OFFSET + d.num_ever_used_items)
+ _ll_dict_setitem_lookup_done(d, key, value, hash, -1)
+
+def ll_dict_move_to_first(d, key):
+ # In this function, we might do a bit more than the strict minimum
+ # of walks over parts of the array, trying to keep the code at least
+ # semi-reasonable, while the goal is still amortized constant-time
+ # over many calls.
+
+ # Call ll_dict_remove_deleted_items() first if there are too many
+ # deleted items. Not a perfect solution, because lookup_function()
+ # might do random things with the dict and create many new deleted
+ # items. Still, should be fine, because nothing crucially depends
+ # on this: the goal is to avoid the dictionary's list growing
+ # forever.
+ if d.num_live_items < len(d.entries) // 2 - 16:
+ ll_dict_remove_deleted_items(d)
+
+ hash = d.keyhash(key)
+ old_index = d.lookup_function(d, key, hash, FLAG_LOOKUP)
+ if old_index <= 0:
+ if old_index < 0:
+ raise KeyError
+ else:
+ return
+
+ # the goal of the following is to set 'idst' to the number of
+ # deleted entries at the beginning, ensuring 'idst > 0'
+ must_reindex = False
+ if d.entries.valid(0):
+ # the first entry is valid, so we need to make room before.
+ new_allocated = _overallocate_entries_len(d.num_ever_used_items)
+ idst = ((new_allocated - d.num_ever_used_items) * 3) // 4
+ ll_assert(idst > 0, "overallocate did not do enough")
+ newitems = lltype.malloc(lltype.typeOf(d).TO.entries.TO, new_allocated)
+ rgc.ll_arraycopy(d.entries, newitems, 0, idst, d.num_ever_used_items)
+ d.entries = newitems
+ i = 0
+ while i < idst:
+ d.entries.mark_deleted(i)
+ i += 1
+ d.num_ever_used_items += idst
+ old_index += idst
+ must_reindex = True
+ idst -= 1
+ else:
+ idst = d.lookup_function_no >> FUNC_SHIFT
+ # All entries in range(0, idst) are deleted. Check if more are
+ while not d.entries.valid(idst):
+ idst += 1
+ if idst == old_index:
+ d.lookup_function_no = ((d.lookup_function_no & FUNC_MASK) |
+ (old_index << FUNC_SHIFT))
+ return
+ idst -= 1
+ d.lookup_function_no = ((d.lookup_function_no & FUNC_MASK) |
+ (idst << FUNC_SHIFT))
+
+ # remove the entry at the old position
+ ll_assert(d.entries.valid(old_index),
+ "ll_dict_move_to_first: lost old_index")
+ ENTRY = lltype.typeOf(d.entries).TO.OF
+ old_entry = d.entries[old_index]
+ key = old_entry.key
+ value = old_entry.value
+ if hasattr(ENTRY, 'f_hash'):
+ ll_assert(old_entry.f_hash == hash,
+ "ll_dict_move_to_first: bad hash")
+ _ll_dict_del_entry(d, old_index)
+
+ # put the entry at its new position
+ ll_assert(not d.entries.valid(idst),
+ "ll_dict_move_to_first: overwriting idst")
+ new_entry = d.entries[idst]
+ new_entry.key = key
+ new_entry.value = value
+ if hasattr(ENTRY, 'f_hash'):
+ new_entry.f_hash = hash
+ if hasattr(ENTRY, 'f_valid'):
+ new_entry.f_valid = True
+ d.num_live_items += 1
+
+ # fix the index
+ if must_reindex:
+ ll_dict_reindex(d, _ll_len_of_d_indexes(d))
+ else:
+ ll_call_delete_by_entry_index(d, hash, old_index,
+ replace_with = VALID_OFFSET + idst)
diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py
--- a/rpython/rtyper/test/test_rdict.py
+++ b/rpython/rtyper/test/test_rdict.py
@@ -1188,6 +1188,12 @@
assert not self.ll_contains(self.l_dict, ll_key)
self.removed_keys.append(key)
+ def move_to_end(self, key, last=True):
+ "For test_rordereddict"
+
+ def move_to_first(self, key):
+ self.move_to_end(key, last=False)
+
def copydict(self):
self.l_dict = self.ll_copy(self.l_dict)
assert self.ll_len(self.l_dict) == len(self.reference)
@@ -1250,6 +1256,15 @@
return builds(Action,
just('delitem'), tuples(sampled_from(self.space.reference)))
+ def st_move_to_end(self):
+ return builds(Action,
+ just('move_to_end'), tuples(sampled_from(self.space.reference)))
+
+ def st_move_to_first(self):
+ return builds(Action,
+ just('move_to_first'),
+ tuples(sampled_from(self.space.reference)))
+
def steps(self):
if not self.space:
return builds(Action, just('setup'), tuples(st_keys, st_values))
@@ -1258,7 +1273,8 @@
if self.space.reference:
return (
self.st_setitem() | sampled_from(global_actions) |
- self.st_updateitem() | self.st_delitem())
+ self.st_updateitem() | self.st_delitem() |
+ self.st_move_to_end() | self.st_move_to_first())
else:
return (self.st_setitem() | sampled_from(global_actions))
diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -1,7 +1,8 @@
import py
+import random
from collections import OrderedDict
-from hypothesis import settings
+from hypothesis import settings, given, strategies
from hypothesis.stateful import run_state_machine_as_test
from rpython.rtyper.lltypesystem import lltype, rffi
@@ -145,14 +146,18 @@
ll_d = rordereddict.ll_newdict(DICT)
rordereddict.ll_dict_setitem(ll_d, llstr("k"), 1)
rordereddict.ll_dict_setitem(ll_d, llstr("j"), 2)
- ITER = rordereddict.get_ll_dictiter(lltype.Ptr(DICT))
+ assert [hlstr(entry.key) for entry in self._ll_iter(ll_d)] == ["k", "j"]
+
+ def _ll_iter(self, ll_d):
+ ITER = rordereddict.get_ll_dictiter(lltype.typeOf(ll_d))
ll_iter = rordereddict.ll_dictiter(ITER, ll_d)
ll_dictnext = rordereddict._ll_dictnext
- num = ll_dictnext(ll_iter)
- assert hlstr(ll_d.entries[num].key) == "k"
- num = ll_dictnext(ll_iter)
- assert hlstr(ll_d.entries[num].key) == "j"
- py.test.raises(StopIteration, ll_dictnext, ll_iter)
+ while True:
+ try:
+ num = ll_dictnext(ll_iter)
+ except StopIteration:
+ break
+ yield ll_d.entries[num]
def test_popitem(self):
DICT = self._get_str_dict()
@@ -337,6 +342,31 @@
num_nonfrees += (got > 0)
assert d.resize_counter <= idx.getlength() * 2 - num_nonfrees * 3
+ @given(strategies.lists(strategies.integers(min_value=1, max_value=5)))
+ def test_direct_move_to_end(self, lst):
+ DICT = self._get_int_dict()
+ ll_d = rordereddict.ll_newdict(DICT)
+ rordereddict.ll_dict_setitem(ll_d, 1, 11)
+ rordereddict.ll_dict_setitem(ll_d, 2, 22)
+ def content():
+ return [(entry.key, entry.value) for entry in self._ll_iter(ll_d)]
+ for case in lst:
+ if case == 1:
+ rordereddict.ll_dict_move_to_end(ll_d, 1, True)
+ assert content() == [(2, 22), (1, 11)]
+ elif case == 2:
+ rordereddict.ll_dict_move_to_end(ll_d, 2, True)
+ assert content() == [(1, 11), (2, 22)]
+ elif case == 3:
+ py.test.raises(KeyError, rordereddict.ll_dict_move_to_end,
+ ll_d, 3, True)
+ elif case == 4:
+ rordereddict.ll_dict_move_to_end(ll_d, 2, False)
+ assert content() == [(2, 22), (1, 11)]
+ elif case == 5:
+ rordereddict.ll_dict_move_to_end(ll_d, 1, False)
+ assert content() == [(1, 11), (2, 22)]
+
class TestRDictDirectDummyKey(TestRDictDirect):
class dummykeyobj:
@@ -369,10 +399,29 @@
res = self.interpret(func, [5])
assert res == 6
+ def test_move_to_end(self):
+ def func():
+ d1 = OrderedDict()
+ d1['key1'] = 'value1'
+ d1['key2'] = 'value2'
+ for i in range(20):
+ objectmodel.move_to_end(d1, 'key1')
+ assert d1.keys() == ['key2', 'key1']
+ objectmodel.move_to_end(d1, 'key2')
+ assert d1.keys() == ['key1', 'key2']
+ for i in range(20):
+ objectmodel.move_to_end(d1, 'key2', last=False)
+ assert d1.keys() == ['key2', 'key1']
+ objectmodel.move_to_end(d1, 'key1', last=False)
+ assert d1.keys() == ['key1', 'key2']
+ func()
+ self.interpret(func, [])
+
class ODictSpace(MappingSpace):
MappingRepr = rodct.OrderedDictRepr
new_reference = OrderedDict
+ moved_around = False
ll_getitem = staticmethod(rodct.ll_dict_getitem)
ll_setitem = staticmethod(rodct.ll_dict_setitem)
ll_delitem = staticmethod(rodct.ll_dict_delitem)
@@ -417,9 +466,23 @@
def removeindex(self):
# remove the index, as done during translation for prebuilt dicts
# (but cannot be done if we already removed a key)
- if not self.removed_keys:
+ if not self.removed_keys and not self.moved_around:
rodct.ll_no_initial_index(self.l_dict)
+ def move_to_end(self, key, last=True):
+ ll_key = self.ll_key(key)
+ rodct.ll_dict_move_to_end(self.l_dict, ll_key, last)
+ value = self.reference.pop(key)
+ if last:
+ self.reference[key] = value
+ else:
+ items = self.reference.items()
+ self.reference.clear()
+ self.reference[key] = value
+ self.reference.update(items)
+ # prevent ll_no_initial_index()
+ self.moved_around = True
+
def fullcheck(self):
# overridden to also check key order
assert self.ll_len(self.l_dict) == len(self.reference)
More information about the pypy-commit
mailing list