Skip to content

Commit dce9ce3

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Radim Krčmář: "ARM: - Fix handling of the 32bit cycle counter - Fix cycle counter filtering x86: - Fix a race leading to double unregistering of user notifiers - Amend oversight in kvm_arch_set_irq that turned Hyper-V code dead - Use SRCU around kvm_lapic_set_vapic_addr - Avoid recursive flushing of asynchronous page faults - Do not rely on deferred update in KVM_GET_CLOCK, which fixes #GP - Let userspace know that KVM_GET_CLOCK is useful with master clock; 4.9 changed the return value to better match the guest clock, but didn't provide means to let guests take advantage of it" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr KVM: async_pf: avoid recursive flushing of work items kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use KVM: Disable irq while unregistering user notifier KVM: x86: do not go through vcpu in __get_kvmclock_ns KVM: arm64: Fix the issues when guest PMCCFILTR is configured arm64: KVM: pmu: Fix AArch32 cycle counter access
2 parents f691838 + a2b0773 commit dce9ce3

9 files changed

Lines changed: 114 additions & 60 deletions

File tree

Documentation/virtual/kvm/api.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In
777777
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
778778
such as migration.
779779

780+
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
781+
set of bits that KVM can return in struct kvm_clock_data's flag member.
782+
783+
The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
784+
value is the exact kvmclock value seen by all VCPUs at the instant
785+
when KVM_GET_CLOCK was called. If clear, the returned value is simply
786+
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
787+
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
788+
but the exact value read by each VCPU could differ, because the host
789+
TSC is not stable.
790+
780791
struct kvm_clock_data {
781792
__u64 clock; /* kvmclock current value */
782793
__u32 flags;

arch/arm64/include/asm/perf_event.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,15 @@
4646
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
4747
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
4848

49-
#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
49+
/*
50+
* PMUv3 event types: required events
51+
*/
52+
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
53+
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
54+
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
55+
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
56+
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
57+
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
5058

5159
/*
5260
* Event filters for PMUv3

arch/arm64/kernel/perf_event.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,9 @@
3131

3232
/*
3333
* ARMv8 PMUv3 Performance Events handling code.
34-
* Common event types.
34+
* Common event types (some are defined in asm/perf_event.h).
3535
*/
3636

37-
/* Required events. */
38-
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
39-
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
40-
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
41-
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
42-
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
43-
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
44-
4537
/* At least one of the following is required. */
4638
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
4739
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B

arch/arm64/kvm/sys_regs.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,16 +597,22 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
597597

598598
idx = ARMV8_PMU_CYCLE_IDX;
599599
} else {
600-
BUG();
600+
return false;
601601
}
602+
} else if (r->CRn == 0 && r->CRm == 9) {
603+
/* PMCCNTR */
604+
if (pmu_access_event_counter_el0_disabled(vcpu))
605+
return false;
606+
607+
idx = ARMV8_PMU_CYCLE_IDX;
602608
} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
603609
/* PMEVCNTRn_EL0 */
604610
if (pmu_access_event_counter_el0_disabled(vcpu))
605611
return false;
606612

607613
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
608614
} else {
609-
BUG();
615+
return false;
610616
}
611617

612618
if (!pmu_counter_idx_valid(vcpu, idx))

arch/x86/kvm/irq_comm.c

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -156,25 +156,43 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
156156
}
157157

158158

159+
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
160+
struct kvm *kvm, int irq_source_id, int level,
161+
bool line_status)
162+
{
163+
if (!level)
164+
return -1;
165+
166+
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
167+
}
168+
159169
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
160170
struct kvm *kvm, int irq_source_id, int level,
161171
bool line_status)
162172
{
163173
struct kvm_lapic_irq irq;
164174
int r;
165175

166-
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
167-
return -EWOULDBLOCK;
176+
switch (e->type) {
177+
case KVM_IRQ_ROUTING_HV_SINT:
178+
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
179+
line_status);
168180

169-
if (kvm_msi_route_invalid(kvm, e))
170-
return -EINVAL;
181+
case KVM_IRQ_ROUTING_MSI:
182+
if (kvm_msi_route_invalid(kvm, e))
183+
return -EINVAL;
171184

172-
kvm_set_msi_irq(kvm, e, &irq);
185+
kvm_set_msi_irq(kvm, e, &irq);
173186

174-
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
175-
return r;
176-
else
177-
return -EWOULDBLOCK;
187+
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
188+
return r;
189+
break;
190+
191+
default:
192+
break;
193+
}
194+
195+
return -EWOULDBLOCK;
178196
}
179197

180198
int kvm_request_irq_source_id(struct kvm *kvm)
@@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
254272
srcu_read_unlock(&kvm->irq_srcu, idx);
255273
}
256274

257-
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
258-
struct kvm *kvm, int irq_source_id, int level,
259-
bool line_status)
260-
{
261-
if (!level)
262-
return -1;
263-
264-
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
265-
}
266-
267275
int kvm_set_routing_entry(struct kvm *kvm,
268276
struct kvm_kernel_irq_routing_entry *e,
269277
const struct kvm_irq_routing_entry *ue)
@@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
423431
srcu_read_unlock(&kvm->irq_srcu, idx);
424432
}
425433

426-
int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
427-
int irq_source_id, int level, bool line_status)
428-
{
429-
switch (irq->type) {
430-
case KVM_IRQ_ROUTING_HV_SINT:
431-
return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
432-
line_status);
433-
default:
434-
return -EWOULDBLOCK;
435-
}
436-
}
437-
438434
void kvm_arch_irq_routing_update(struct kvm *kvm)
439435
{
440436
kvm_hv_irq_routing_update(kvm);

arch/x86/kvm/x86.c

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,25 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
210210
struct kvm_shared_msrs *locals
211211
= container_of(urn, struct kvm_shared_msrs, urn);
212212
struct kvm_shared_msr_values *values;
213+
unsigned long flags;
213214

215+
/*
216+
* Disabling irqs at this point since the following code could be
217+
* interrupted and executed through kvm_arch_hardware_disable()
218+
*/
219+
local_irq_save(flags);
220+
if (locals->registered) {
221+
locals->registered = false;
222+
user_return_notifier_unregister(urn);
223+
}
224+
local_irq_restore(flags);
214225
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
215226
values = &locals->values[slot];
216227
if (values->host != values->curr) {
217228
wrmsrl(shared_msrs_global.msrs[slot], values->host);
218229
values->curr = values->host;
219230
}
220231
}
221-
locals->registered = false;
222-
user_return_notifier_unregister(urn);
223232
}
224233

225234
static void shared_msr_update(unsigned slot, u32 msr)
@@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
17241733

17251734
static u64 __get_kvmclock_ns(struct kvm *kvm)
17261735
{
1727-
struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
17281736
struct kvm_arch *ka = &kvm->arch;
1729-
s64 ns;
1737+
struct pvclock_vcpu_time_info hv_clock;
17301738

1731-
if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
1732-
u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1733-
ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
1734-
} else {
1735-
ns = ktime_get_boot_ns() + ka->kvmclock_offset;
1739+
spin_lock(&ka->pvclock_gtod_sync_lock);
1740+
if (!ka->use_master_clock) {
1741+
spin_unlock(&ka->pvclock_gtod_sync_lock);
1742+
return ktime_get_boot_ns() + ka->kvmclock_offset;
17361743
}
17371744

1738-
return ns;
1745+
hv_clock.tsc_timestamp = ka->master_cycle_now;
1746+
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
1747+
spin_unlock(&ka->pvclock_gtod_sync_lock);
1748+
1749+
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
1750+
&hv_clock.tsc_shift,
1751+
&hv_clock.tsc_to_system_mul);
1752+
return __pvclock_read_cycles(&hv_clock, rdtsc());
17391753
}
17401754

17411755
u64 get_kvmclock_ns(struct kvm *kvm)
@@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
25962610
case KVM_CAP_PIT_STATE2:
25972611
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
25982612
case KVM_CAP_XEN_HVM:
2599-
case KVM_CAP_ADJUST_CLOCK:
26002613
case KVM_CAP_VCPU_EVENTS:
26012614
case KVM_CAP_HYPERV:
26022615
case KVM_CAP_HYPERV_VAPIC:
@@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
26232636
#endif
26242637
r = 1;
26252638
break;
2639+
case KVM_CAP_ADJUST_CLOCK:
2640+
r = KVM_CLOCK_TSC_STABLE;
2641+
break;
26262642
case KVM_CAP_X86_SMM:
26272643
/* SMBASE is usually relocated above 1M on modern chipsets,
26282644
* and SMM handlers might indeed rely on 4G segment limits,
@@ -3415,14 +3431,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
34153431
};
34163432
case KVM_SET_VAPIC_ADDR: {
34173433
struct kvm_vapic_addr va;
3434+
int idx;
34183435

34193436
r = -EINVAL;
34203437
if (!lapic_in_kernel(vcpu))
34213438
goto out;
34223439
r = -EFAULT;
34233440
if (copy_from_user(&va, argp, sizeof va))
34243441
goto out;
3442+
idx = srcu_read_lock(&vcpu->kvm->srcu);
34253443
r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3444+
srcu_read_unlock(&vcpu->kvm->srcu, idx);
34263445
break;
34273446
}
34283447
case KVM_X86_SETUP_MCE: {
@@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
41034122
struct kvm_clock_data user_ns;
41044123
u64 now_ns;
41054124

4106-
now_ns = get_kvmclock_ns(kvm);
4125+
local_irq_disable();
4126+
now_ns = __get_kvmclock_ns(kvm);
41074127
user_ns.clock = now_ns;
4108-
user_ns.flags = 0;
4128+
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
4129+
local_irq_enable();
41094130
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
41104131

41114132
r = -EFAULT;

include/uapi/linux/kvm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,12 +972,19 @@ struct kvm_irqfd {
972972
__u8 pad[16];
973973
};
974974

975+
/* For KVM_CAP_ADJUST_CLOCK */
976+
977+
/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
978+
#define KVM_CLOCK_TSC_STABLE 2
979+
975980
struct kvm_clock_data {
976981
__u64 clock;
977982
__u32 flags;
978983
__u32 pad[9];
979984
};
980985

986+
/* For KVM_CAP_SW_TLB */
987+
981988
#define KVM_MMU_FSL_BOOKE_NOHV 0
982989
#define KVM_MMU_FSL_BOOKE_HV 1
983990

virt/kvm/arm/pmu.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
305305
continue;
306306
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
307307
& ARMV8_PMU_EVTYPE_EVENT;
308-
if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
308+
if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
309309
&& (enable & BIT(i))) {
310310
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
311311
reg = lower_32_bits(reg);
@@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
379379
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
380380

381381
/* Software increment event does't need to be backed by a perf event */
382-
if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
382+
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
383+
select_idx != ARMV8_PMU_CYCLE_IDX)
383384
return;
384385

385386
memset(&attr, 0, sizeof(struct perf_event_attr));
@@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
391392
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
392393
attr.exclude_hv = 1; /* Don't count EL2 events */
393394
attr.exclude_host = 1; /* Don't count host events */
394-
attr.config = eventsel;
395+
attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
396+
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
395397

396398
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
397399
/* The initial sample period (overflow count) of an event. */

virt/kvm/async_pf.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work)
9191

9292
spin_lock(&vcpu->async_pf.lock);
9393
list_add_tail(&apf->link, &vcpu->async_pf.done);
94+
apf->vcpu = NULL;
9495
spin_unlock(&vcpu->async_pf.lock);
9596

9697
/*
@@ -113,13 +114,23 @@ static void async_pf_execute(struct work_struct *work)
113114

114115
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
115116
{
117+
spin_lock(&vcpu->async_pf.lock);
118+
116119
/* cancel outstanding work queue item */
117120
while (!list_empty(&vcpu->async_pf.queue)) {
118121
struct kvm_async_pf *work =
119122
list_first_entry(&vcpu->async_pf.queue,
120123
typeof(*work), queue);
121124
list_del(&work->queue);
122125

126+
/*
127+
* We know it's present in vcpu->async_pf.done, do
128+
* nothing here.
129+
*/
130+
if (!work->vcpu)
131+
continue;
132+
133+
spin_unlock(&vcpu->async_pf.lock);
123134
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
124135
flush_work(&work->work);
125136
#else
@@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
129140
kmem_cache_free(async_pf_cache, work);
130141
}
131142
#endif
143+
spin_lock(&vcpu->async_pf.lock);
132144
}
133145

134-
spin_lock(&vcpu->async_pf.lock);
135146
while (!list_empty(&vcpu->async_pf.done)) {
136147
struct kvm_async_pf *work =
137148
list_first_entry(&vcpu->async_pf.done,

0 commit comments

Comments
 (0)