Skip to content

Commit cd53ce8

Browse files
committed
Track HashTableIterators for copy-on-write copies of HashTables
When executing a foreach ($ht as &$ref), foreach calls zend_hash_iterator_pos_ex() on every iteration. If the HashTable contained in the $ht variable is not the tracked HashTable, it will reset the position to the internal array pointer of the array currently in $ht. This behaviour is generally fine, but undesirable for copy-on-write copies of the iterated HashTable. This may trivially occur when the iterated over HashTable is assigned to some variable, then the iterated over variable modified, leading to array separation, changing the HashTable pointer in the variable. Thus foreach happily restarting iteration. This behaviour (despite existing since PHP 7.0) is considered a bug, if not only for the behaviour being unexpected to the user, also copy-on-write should not have trivially observable side-effects by mere assignment. The bugfix consists of duplicating HashTableIterators whenever zend_array_dup() is called (the primitive used on array separation). When a further access to the HashPosition through the HashTableIterators API happens and the HashTable does not match the tracked one, all the duplicates (which are tracked by single linked list) are searched for the wanted HashTable. If found, the HashTableIterator is replaced by the found copy and all other copies are removed. This ensures that we always end up tracking the correct HashTable. Fixes GH-11244. Signed-off-by: Bob Weinand <[email protected]>
1 parent 26d6bb3 commit cd53ce8

8 files changed

+241
-9
lines changed

Zend/tests/gh11222.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
--TEST--
2-
GH-112222: foreach by-ref may jump over keys during a rehash
2+
GH-11222: foreach by-ref may jump over keys during a rehash
33
--FILE--
44
<?php
55

Zend/tests/gh11244-001.phpt

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
--TEST--
2+
GH-11244: Modifying a copied by-ref iterated array resets the array position (packed)
3+
--FILE--
4+
<?php
5+
6+
$data = [0, 1, 2];
7+
8+
foreach ($data as $key => &$value) {
9+
echo "$value\n";
10+
if ($value === 1) {
11+
$cow_copy = $data;
12+
echo "unset $value\n";
13+
unset($data[$key]);
14+
}
15+
}
16+
17+
?>
18+
--EXPECTF--
19+
0
20+
1
21+
unset 1
22+
2

Zend/tests/gh11244-002.phpt

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
--TEST--
2+
GH-11244: Modifying a copied by-ref iterated array resets the array position (not packed)
3+
--FILE--
4+
<?php
5+
6+
$data = ["k" => 0, 1, 2];
7+
8+
foreach ($data as $key => &$value) {
9+
echo "$value\n";
10+
if ($value === 1) {
11+
$cow_copy = $data;
12+
echo "unset $value\n";
13+
unset($data[$key]);
14+
}
15+
}
16+
17+
?>
18+
--EXPECTF--
19+
0
20+
1
21+
unset 1
22+
2

Zend/tests/gh11244-003.phpt

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
--TEST--
2+
GH-11244: Modifying a copied by-ref iterated array resets the array position (not packed with holes)
3+
--FILE--
4+
<?php
5+
6+
$data = ["k" => 0, 1, 2, 3];
7+
unset($data[1]);
8+
9+
foreach ($data as $key => &$value) {
10+
echo "$value\n";
11+
if ($value === 1) {
12+
$cow_copy = $data;
13+
echo "unset $value\n";
14+
unset($data[$key]);
15+
}
16+
}
17+
18+
?>
19+
--EXPECTF--
20+
0
21+
1
22+
unset 1
23+
3

Zend/tests/gh11244-004.phpt

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
--TEST--
2+
GH-11244: Modifying a copied by-ref iterated array resets the array position (with object)
3+
--FILE--
4+
<?php
5+
6+
$obj = (object)[1,2,3];
7+
8+
foreach ($obj as $p => $v) {
9+
echo "$p : $v\n";
10+
$clone = clone $obj;
11+
$ref = &$obj->$p;
12+
}
13+
14+
?>
15+
--EXPECTF--
16+
0 : 1
17+
1 : 2
18+
2 : 3

Zend/tests/gh11244-005.phpt

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
--TEST--
2+
GH-11244: Modifying a copied by-ref iterated array resets the array position (multiple copies)
3+
--FILE--
4+
<?php
5+
6+
$data = [0, 1, 2];
7+
8+
foreach ($data as $key => &$value) {
9+
echo "$value\n";
10+
if ($value === 1) {
11+
$cow_copy = [$data, $data, $data];
12+
echo "unset $value\n";
13+
unset($cow_copy[0][$key]);
14+
unset($data[$key]);
15+
unset($cow_copy[2][$key]);
16+
}
17+
}
18+
19+
print_r($cow_copy);
20+
21+
?>
22+
--EXPECTF--
23+
0
24+
1
25+
unset 1
26+
2
27+
Array
28+
(
29+
[0] => Array
30+
(
31+
[0] => 0
32+
[2] => 2
33+
)
34+
35+
[1] => Array
36+
(
37+
[0] => 0
38+
[1] => 1
39+
[2] => 2
40+
)
41+
42+
[2] => Array
43+
(
44+
[0] => 0
45+
[2] => 2
46+
)
47+
48+
)

Zend/zend_hash.c

+106-8
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,21 @@ ZEND_API HashPosition ZEND_FASTCALL zend_hash_get_current_pos(const HashTable *h
514514
return _zend_hash_get_current_pos(ht);
515515
}
516516

517+
static void zend_hash_remove_iterator_copies(uint32_t idx) {
518+
HashTableIterator *iterators = EG(ht_iterators);
519+
520+
HashTableIterator *iter = iterators + idx;
521+
uint32_t next_idx = iter->next_copy;
522+
while (next_idx != idx) {
523+
uint32_t cur_idx = next_idx;
524+
HashTableIterator *cur_iter = iterators + cur_idx;
525+
next_idx = cur_iter->next_copy;
526+
cur_iter->next_copy = cur_idx; // avoid recursion in zend_hash_iterator_del
527+
zend_hash_iterator_del(cur_idx);
528+
}
529+
iter->next_copy = idx;
530+
}
531+
517532
ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPosition pos)
518533
{
519534
HashTableIterator *iter = EG(ht_iterators);
@@ -528,6 +543,7 @@ ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPositi
528543
iter->ht = ht;
529544
iter->pos = pos;
530545
idx = iter - EG(ht_iterators);
546+
iter->next_copy = idx;
531547
if (idx + 1 > EG(ht_iterators_used)) {
532548
EG(ht_iterators_used) = idx + 1;
533549
}
@@ -547,16 +563,49 @@ ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPositi
547563
iter->pos = pos;
548564
memset(iter + 1, 0, sizeof(HashTableIterator) * 7);
549565
idx = iter - EG(ht_iterators);
566+
iter->next_copy = idx;
550567
EG(ht_iterators_used) = idx + 1;
551568
return idx;
552569
}
553570

571+
// To avoid losing track of the HashTable when separating arrays, we track all copies at once.
572+
static zend_always_inline bool zend_hash_iterator_find_copy_pos(uint32_t idx, HashTable *ht) {
573+
HashTableIterator *iter = EG(ht_iterators) + idx;
574+
575+
uint32_t next_idx = iter->next_copy;
576+
if (EXPECTED(next_idx != idx)) {
577+
HashTableIterator *copy_iter;
578+
while (next_idx != idx) {
579+
copy_iter = EG(ht_iterators) + next_idx;
580+
if (copy_iter->ht == ht) {
581+
// We have found the hashtable we are actually iterating over
582+
// Now clean any intermittent copies and replace the original index by the found one
583+
if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR)
584+
&& EXPECTED(!HT_ITERATORS_OVERFLOW(iter->ht))) {
585+
HT_DEC_ITERATORS_COUNT(iter->ht);
586+
}
587+
if (EXPECTED(!HT_ITERATORS_OVERFLOW(ht))) {
588+
HT_INC_ITERATORS_COUNT(ht);
589+
}
590+
iter->ht = copy_iter->ht;
591+
iter->pos = copy_iter->pos;
592+
zend_hash_remove_iterator_copies(idx);
593+
return true;
594+
}
595+
next_idx = copy_iter->next_copy;
596+
}
597+
zend_hash_remove_iterator_copies(idx);
598+
}
599+
600+
return false;
601+
}
602+
554603
ZEND_API HashPosition ZEND_FASTCALL zend_hash_iterator_pos(uint32_t idx, HashTable *ht)
555604
{
556605
HashTableIterator *iter = EG(ht_iterators) + idx;
557606

558607
ZEND_ASSERT(idx != (uint32_t)-1);
559-
if (UNEXPECTED(iter->ht != ht)) {
608+
if (UNEXPECTED(iter->ht != ht) && !zend_hash_iterator_find_copy_pos(idx, ht)) {
560609
if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR)
561610
&& EXPECTED(!HT_ITERATORS_OVERFLOW(iter->ht))) {
562611
HT_DEC_ITERATORS_COUNT(iter->ht);
@@ -576,7 +625,7 @@ ZEND_API HashPosition ZEND_FASTCALL zend_hash_iterator_pos_ex(uint32_t idx, zval
576625
HashTableIterator *iter = EG(ht_iterators) + idx;
577626

578627
ZEND_ASSERT(idx != (uint32_t)-1);
579-
if (UNEXPECTED(iter->ht != ht)) {
628+
if (UNEXPECTED(iter->ht != ht) && !zend_hash_iterator_find_copy_pos(idx, ht)) {
580629
if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR)
581630
&& EXPECTED(!HT_ITERATORS_OVERFLOW(ht))) {
582631
HT_DEC_ITERATORS_COUNT(iter->ht);
@@ -605,6 +654,10 @@ ZEND_API void ZEND_FASTCALL zend_hash_iterator_del(uint32_t idx)
605654
}
606655
iter->ht = NULL;
607656

657+
if (UNEXPECTED(iter->next_copy != idx)) {
658+
zend_hash_remove_iterator_copies(idx);
659+
}
660+
608661
if (idx == EG(ht_iterators_used) - 1) {
609662
while (idx > 0 && EG(ht_iterators)[idx - 1].ht == NULL) {
610663
idx--;
@@ -2291,6 +2344,22 @@ static zend_always_inline bool zend_array_dup_element(HashTable *source, HashTab
22912344
return 1;
22922345
}
22932346

2347+
// We need to duplicate iterators to be able to search through all copy-on-write copies to find the actually iterated HashTable and position back
2348+
static void zend_array_dup_ht_iterators(HashTable *source, HashTable *target) {
2349+
HashTableIterator *iter = EG(ht_iterators);
2350+
HashTableIterator *end = iter + EG(ht_iterators_used);
2351+
2352+
while (iter != end) {
2353+
if (iter->ht == source) {
2354+
uint32_t copy_idx = zend_hash_iterator_add(target, iter->pos);
2355+
HashTableIterator *copy_iter = EG(ht_iterators) + copy_idx;
2356+
copy_iter->next_copy = iter->next_copy;
2357+
iter->next_copy = copy_idx;
2358+
}
2359+
iter++;
2360+
}
2361+
}
2362+
22942363
static zend_always_inline void zend_array_dup_packed_elements(HashTable *source, HashTable *target, bool with_holes)
22952364
{
22962365
zval *p = source->arPacked;
@@ -2305,6 +2374,10 @@ static zend_always_inline void zend_array_dup_packed_elements(HashTable *source,
23052374
}
23062375
p++; q++;
23072376
} while (p != end);
2377+
2378+
if (UNEXPECTED(HT_HAS_ITERATORS(source))) {
2379+
zend_array_dup_ht_iterators(source, target);
2380+
}
23082381
}
23092382

23102383
static zend_always_inline uint32_t zend_array_dup_elements(HashTable *source, HashTable *target, bool static_keys, bool with_holes)
@@ -2314,19 +2387,44 @@ static zend_always_inline uint32_t zend_array_dup_elements(HashTable *source, Ha
23142387
Bucket *q = target->arData;
23152388
Bucket *end = p + source->nNumUsed;
23162389

2390+
if (UNEXPECTED(HT_HAS_ITERATORS(source))) {
2391+
zend_array_dup_ht_iterators(source, target);
2392+
}
2393+
23172394
do {
23182395
if (!zend_array_dup_element(source, target, idx, p, q, 0, static_keys, with_holes)) {
23192396
uint32_t target_idx = idx;
23202397

23212398
idx++; p++;
2322-
while (p != end) {
2323-
if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) {
2324-
if (source->nInternalPointer == idx) {
2325-
target->nInternalPointer = target_idx;
2399+
if (EXPECTED(!HT_HAS_ITERATORS(target))) {
2400+
while (p != end) {
2401+
if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) {
2402+
if (source->nInternalPointer == idx) {
2403+
target->nInternalPointer = target_idx;
2404+
}
2405+
target_idx++; q++;
2406+
}
2407+
idx++; p++;
2408+
}
2409+
} else {
2410+
target->nNumUsed = source->nNumOfElements;
2411+
uint32_t iter_pos = zend_hash_iterators_lower_pos(target, idx);
2412+
2413+
while (p != end) {
2414+
if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) {
2415+
if (source->nInternalPointer == idx) {
2416+
target->nInternalPointer = target_idx;
2417+
}
2418+
if (UNEXPECTED(idx >= iter_pos)) {
2419+
do {
2420+
zend_hash_iterators_update(target, iter_pos, target_idx);
2421+
iter_pos = zend_hash_iterators_lower_pos(target, iter_pos + 1);
2422+
} while (iter_pos < idx);
2423+
}
2424+
target_idx++; q++;
23262425
}
2327-
target_idx++; q++;
2426+
idx++; p++;
23282427
}
2329-
idx++; p++;
23302428
}
23312429
return target_idx;
23322430
}

Zend/zend_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,7 @@ typedef uint32_t HashPosition;
550550
typedef struct _HashTableIterator {
551551
HashTable *ht;
552552
HashPosition pos;
553+
uint32_t next_copy;
553554
} HashTableIterator;
554555

555556
struct _zend_object {

0 commit comments

Comments
 (0)