Skip to content

Commit b827ef0

Browse files
author
Claudio Imbrenda
committed
KVM: s390: Remove non-atomic dat_crstep_xchg()
In practice dat_crstep_xchg() is racy and hard to use correctly. Simply remove it and replace its uses with dat_crstep_xchg_atomic(). This solves some actual races that lead to system hangs / crashes. Opportunistically fix an alignment issue in _gmap_crstep_xchg_atomic(). Fixes: 589071e ("KVM: s390: KVM page table management functions: clear and replace") Fixes: 94fd9b1 ("KVM: s390: KVM page table management functions: lifecycle management") Reviewed-by: Steffen Eiden <seiden@linux.ibm.com> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
1 parent 0f54755 commit b827ef0

5 files changed

Lines changed: 100 additions & 97 deletions

File tree

arch/s390/kvm/dat.c

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -134,32 +134,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
134134
return 0;
135135
}
136136

137-
/**
138-
* dat_crstep_xchg() - Exchange a gmap CRSTE with another.
139-
* @crstep: Pointer to the CRST entry
140-
* @new: Replacement entry.
141-
* @gfn: The affected guest address.
142-
* @asce: The ASCE of the address space.
143-
*
144-
* Context: This function is assumed to be called with kvm->mmu_lock held.
145-
*/
146-
void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
147-
{
148-
if (crstep->h.i) {
149-
WRITE_ONCE(*crstep, new);
150-
return;
151-
} else if (cpu_has_edat2()) {
152-
crdte_crste(crstep, *crstep, new, gfn, asce);
153-
return;
154-
}
155-
156-
if (machine_has_tlb_guest())
157-
idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
158-
else
159-
idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
160-
WRITE_ONCE(*crstep, new);
161-
}
162-
163137
/**
164138
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
165139
* @crstep: Pointer to the CRST entry.
@@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
175149
*
176150
* Return: %true if the exchange was successful.
177151
*/
178-
bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
179-
union asce asce)
152+
bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
153+
gfn_t gfn, union asce asce)
180154
{
181155
if (old.h.i)
182156
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
@@ -894,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
894868

895869
/* This table entry needs to be updated. */
896870
if (walk->start <= gfn && walk->end >= next) {
897-
dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
871+
if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
872+
return -EINVAL;
898873
/* A lower level table was present, needs to be freed. */
899874
if (!crste.h.fc && !crste.h.i) {
900875
if (is_pmd(crste))
@@ -1072,17 +1047,19 @@ int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
10721047

10731048
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
10741049
{
1075-
union crste crste = READ_ONCE(*crstep);
1050+
union crste newcrste, oldcrste;
10761051
int *n = walk->priv;
10771052

1078-
if (!crste.h.fc || crste.h.i || crste.h.p)
1079-
return 0;
1080-
1053+
do {
1054+
oldcrste = READ_ONCE(*crstep);
1055+
if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
1056+
return 0;
1057+
if (oldcrste.s.fc1.prefix_notif)
1058+
break;
1059+
newcrste = oldcrste;
1060+
newcrste.s.fc1.prefix_notif = 1;
1061+
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
10811062
*n = 2;
1082-
if (crste.s.fc1.prefix_notif)
1083-
return 0;
1084-
crste.s.fc1.prefix_notif = 1;
1085-
dat_crstep_xchg(crstep, crste, gfn, walk->asce);
10861063
return 0;
10871064
}
10881065

arch/s390/kvm/dat.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -938,11 +938,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
938938
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
939939
}
940940

941-
static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
941+
static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
942942
{
943-
union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
943+
union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
944944

945-
dat_crstep_xchg(crstep, newcrste, gfn, asce);
945+
do {
946+
oldcrste = READ_ONCE(*crstep);
947+
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
948+
return oldcrste;
946949
}
947950

948951
static inline int get_level(union crste *crstep, union pte *ptep)

arch/s390/kvm/gaccess.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,7 +1456,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
14561456
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
14571457
struct guest_fault *f, bool p)
14581458
{
1459-
union crste newcrste;
1459+
union crste newcrste, oldcrste;
14601460
gfn_t gfn;
14611461
int rc;
14621462

@@ -1469,16 +1469,20 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
14691469
if (rc)
14701470
return rc;
14711471

1472-
newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
1473-
newcrste.s.fc1.d |= host->s.fc1.d;
1474-
newcrste.s.fc1.sd |= host->s.fc1.sd;
1475-
newcrste.h.p &= host->h.p;
1476-
newcrste.s.fc1.vsie_notif = 1;
1477-
newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
1478-
_gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
1479-
1480-
newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
1481-
dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
1472+
do {
1473+
oldcrste = READ_ONCE(*host);
1474+
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
1475+
newcrste.s.fc1.d |= oldcrste.s.fc1.d;
1476+
newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
1477+
newcrste.h.p &= oldcrste.h.p;
1478+
newcrste.s.fc1.vsie_notif = 1;
1479+
newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
1480+
} while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
1481+
1482+
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
1483+
gfn = gpa_to_gfn(raddr);
1484+
while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
1485+
;
14821486
return 0;
14831487
}
14841488

arch/s390/kvm/gmap.c

Lines changed: 48 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
313313
struct clear_young_pte_priv *priv = walk->priv;
314314
union crste crste, new;
315315

316-
crste = READ_ONCE(*crstep);
316+
do {
317+
crste = READ_ONCE(*crstep);
318+
319+
if (!crste.h.fc)
320+
return 0;
321+
if (!crste.s.fc1.y && crste.h.i)
322+
return 0;
323+
if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
324+
break;
317325

318-
if (!crste.h.fc)
319-
return 0;
320-
if (!crste.s.fc1.y && crste.h.i)
321-
return 0;
322-
if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
323326
new = crste;
324327
new.h.i = 1;
325328
new.s.fc1.y = 0;
@@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
328331
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
329332
new.s.fc1.d = 0;
330333
new.h.p = 1;
331-
dat_crstep_xchg(crstep, new, gfn, walk->asce);
332-
}
334+
} while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
335+
333336
priv->young = 1;
334337
return 0;
335338
}
@@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
391394
{
392395
struct gmap_unmap_priv *priv = walk->priv;
393396
struct folio *folio = NULL;
397+
union crste old = *crstep;
394398

395-
if (crstep->h.fc) {
396-
if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
397-
folio = phys_to_folio(crste_origin_large(*crstep));
398-
gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
399-
if (folio)
400-
uv_convert_from_secure_folio(folio);
401-
}
399+
if (!old.h.fc)
400+
return 0;
401+
402+
if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
403+
folio = phys_to_folio(crste_origin_large(old));
404+
/* No races should happen because kvm->mmu_lock is held in write mode */
405+
KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
406+
priv->gmap->kvm);
407+
if (folio)
408+
uv_convert_from_secure_folio(folio);
402409

403410
return 0;
404411
}
@@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
474481

475482
if (fatal_signal_pending(current))
476483
return 1;
477-
crste = READ_ONCE(*table);
478-
if (!crste.h.fc)
479-
return 0;
480-
if (crste.h.p && !crste.s.fc1.sd)
481-
return 0;
484+
do {
485+
crste = READ_ONCE(*table);
486+
if (!crste.h.fc)
487+
return 0;
488+
if (crste.h.p && !crste.s.fc1.sd)
489+
return 0;
482490

483-
/*
484-
* If this large page contains one or more prefixes of vCPUs that are
485-
* currently running, do not reset the protection, leave it marked as
486-
* dirty.
487-
*/
488-
if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
491+
/*
492+
* If this large page contains one or more prefixes of vCPUs that are
493+
* currently running, do not reset the protection, leave it marked as
494+
* dirty.
495+
*/
496+
if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
497+
break;
489498
new = crste;
490499
new.h.p = 1;
491500
new.s.fc1.sd = 0;
492-
gmap_crstep_xchg(gmap, table, new, gfn);
493-
}
501+
} while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
494502

495503
for ( ; gfn < end; gfn++)
496504
mark_page_dirty(gmap->kvm, gfn);
@@ -646,8 +654,8 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
646654
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
647655
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
648656
{
657+
union crste newcrste, oldcrste;
649658
struct page_table *pt;
650-
union crste newcrste;
651659
union crste *crstep;
652660
union pte *ptep;
653661
int rc;
@@ -673,7 +681,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
673681
&crstep, &ptep);
674682
if (rc)
675683
return rc;
676-
dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
684+
do {
685+
oldcrste = READ_ONCE(*crstep);
686+
if (oldcrste.val == newcrste.val)
687+
break;
688+
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
677689
return 0;
678690
}
679691

@@ -777,8 +789,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
777789
int rc;
778790

779791
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
780-
if (!rc)
781-
dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
792+
if (rc)
793+
return;
794+
while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
795+
;
782796
}
783797

784798
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
@@ -1017,8 +1031,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
10171031
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
10181032
return;
10191033
}
1020-
crste = READ_ONCE(*crstep);
1021-
dat_crstep_clear(crstep, r_gfn, sg->asce);
1034+
1035+
crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
10221036
if (crste_leaf(crste) || crste.h.i)
10231037
return;
10241038
if (is_pmd(crste))

arch/s390/kvm/gmap.h

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -194,35 +194,40 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
194194
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
195195
}
196196

197-
static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
198-
gfn_t gfn, bool needs_lock)
197+
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
198+
union crste oldcrste, union crste newcrste,
199+
gfn_t gfn, bool needs_lock)
199200
{
200-
unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
201+
unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
202+
203+
if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm))
204+
return true;
201205

202206
lockdep_assert_held(&gmap->kvm->mmu_lock);
203207
if (!needs_lock)
204208
lockdep_assert_held(&gmap->children_lock);
205209

206210
gfn = ALIGN_DOWN(gfn, align);
207-
if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
208-
ne.s.fc1.prefix_notif = 0;
211+
if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
212+
newcrste.s.fc1.prefix_notif = 0;
209213
gmap_unmap_prefix(gmap, gfn, gfn + align);
210214
}
211-
if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
212-
(ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
213-
ne.s.fc1.vsie_notif = 0;
215+
if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
216+
(newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
217+
newcrste.s.fc1.vsie_notif = 0;
214218
if (needs_lock)
215219
gmap_handle_vsie_unshadow_event(gmap, gfn);
216220
else
217221
_gmap_handle_vsie_unshadow_event(gmap, gfn);
218222
}
219-
dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
223+
return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
220224
}
221225

222-
static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
223-
gfn_t gfn)
226+
static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
227+
union crste oldcrste, union crste newcrste,
228+
gfn_t gfn)
224229
{
225-
return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
230+
return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
226231
}
227232

228233
/**

0 commit comments

Comments
 (0)