|
|
|
|
@@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd,
|
|
|
|
|
return err ? err : IRQ_SET_MASK_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void free_moved_vector(struct apic_chip_data *apicd)
|
|
|
|
|
{
|
|
|
|
|
unsigned int vector = apicd->prev_vector;
|
|
|
|
|
unsigned int cpu = apicd->prev_cpu;
|
|
|
|
|
bool managed = apicd->is_managed;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Managed interrupts are usually not migrated away
|
|
|
|
|
* from an online CPU, but CPU isolation 'managed_irq'
|
|
|
|
|
* can make that happen.
|
|
|
|
|
* 1) Activation does not take the isolation into account
|
|
|
|
|
* to keep the code simple
|
|
|
|
|
* 2) Migration away from an isolated CPU can happen when
|
|
|
|
|
* a non-isolated CPU which is in the calculated
|
|
|
|
|
* affinity mask comes online.
|
|
|
|
|
*/
|
|
|
|
|
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
|
|
|
|
|
irq_matrix_free(vector_matrix, cpu, vector, managed);
|
|
|
|
|
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
|
|
|
|
hlist_del_init(&apicd->clist);
|
|
|
|
|
apicd->prev_vector = 0;
|
|
|
|
|
apicd->move_in_progress = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
|
|
|
|
|
*/
|
|
|
|
|
static void apic_force_complete_move(struct irq_data *irqd)
|
|
|
|
|
{
|
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
struct apic_chip_data *apicd;
|
|
|
|
|
unsigned int vector;
|
|
|
|
|
|
|
|
|
|
guard(raw_spinlock)(&vector_lock);
|
|
|
|
|
apicd = apic_chip_data(irqd);
|
|
|
|
|
if (!apicd)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If prev_vector is empty or the descriptor is neither currently
|
|
|
|
|
* nor previously on the outgoing CPU no action required.
|
|
|
|
|
*/
|
|
|
|
|
vector = apicd->prev_vector;
|
|
|
|
|
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This is tricky. If the cleanup of the old vector has not been
|
|
|
|
|
* done yet, then the following setaffinity call will fail with
|
|
|
|
|
* -EBUSY. This can leave the interrupt in a stale state.
|
|
|
|
|
*
|
|
|
|
|
* All CPUs are stuck in stop machine with interrupts disabled so
|
|
|
|
|
* calling __irq_complete_move() would be completely pointless.
|
|
|
|
|
*
|
|
|
|
|
* 1) The interrupt is in move_in_progress state. That means that we
|
|
|
|
|
* have not seen an interrupt since the io_apic was reprogrammed to
|
|
|
|
|
* the new vector.
|
|
|
|
|
*
|
|
|
|
|
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
|
|
|
|
|
* have not been processed yet.
|
|
|
|
|
*/
|
|
|
|
|
if (apicd->move_in_progress) {
|
|
|
|
|
/*
|
|
|
|
|
* In theory there is a race:
|
|
|
|
|
*
|
|
|
|
|
* set_ioapic(new_vector) <-- Interrupt is raised before update
|
|
|
|
|
* is effective, i.e. it's raised on
|
|
|
|
|
* the old vector.
|
|
|
|
|
*
|
|
|
|
|
* So if the target cpu cannot handle that interrupt before
|
|
|
|
|
* the old vector is cleaned up, we get a spurious interrupt
|
|
|
|
|
* and in the worst case the ioapic irq line becomes stale.
|
|
|
|
|
*
|
|
|
|
|
* But in case of cpu hotplug this should be a non issue
|
|
|
|
|
* because if the affinity update happens right before all
|
|
|
|
|
* cpus rendezvous in stop machine, there is no way that the
|
|
|
|
|
* interrupt can be blocked on the target cpu because all cpus
|
|
|
|
|
* loops first with interrupts enabled in stop machine, so the
|
|
|
|
|
* old vector is not yet cleaned up when the interrupt fires.
|
|
|
|
|
*
|
|
|
|
|
* So the only way to run into this issue is if the delivery
|
|
|
|
|
* of the interrupt on the apic/system bus would be delayed
|
|
|
|
|
* beyond the point where the target cpu disables interrupts
|
|
|
|
|
* in stop machine. I doubt that it can happen, but at least
|
|
|
|
|
* there is a theoretical chance. Virtualization might be
|
|
|
|
|
* able to expose this, but AFAICT the IOAPIC emulation is not
|
|
|
|
|
* as stupid as the real hardware.
|
|
|
|
|
*
|
|
|
|
|
* Anyway, there is nothing we can do about that at this point
|
|
|
|
|
* w/o refactoring the whole fixup_irq() business completely.
|
|
|
|
|
* We print at least the irq number and the old vector number,
|
|
|
|
|
* so we have the necessary information when a problem in that
|
|
|
|
|
* area arises.
|
|
|
|
|
*/
|
|
|
|
|
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
|
|
|
|
|
irqd->irq, vector);
|
|
|
|
|
}
|
|
|
|
|
free_moved_vector(apicd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
# define apic_set_affinity NULL
|
|
|
|
|
# define apic_set_affinity NULL
|
|
|
|
|
# define apic_force_complete_move NULL
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
static int apic_retrigger_irq(struct irq_data *irqd)
|
|
|
|
|
@@ -923,39 +1024,16 @@ static void x86_vector_msi_compose_msg(struct irq_data *data,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct irq_chip lapic_controller = {
|
|
|
|
|
.name = "APIC",
|
|
|
|
|
.irq_ack = apic_ack_edge,
|
|
|
|
|
.irq_set_affinity = apic_set_affinity,
|
|
|
|
|
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
|
|
|
|
|
.irq_retrigger = apic_retrigger_irq,
|
|
|
|
|
.name = "APIC",
|
|
|
|
|
.irq_ack = apic_ack_edge,
|
|
|
|
|
.irq_set_affinity = apic_set_affinity,
|
|
|
|
|
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
|
|
|
|
|
.irq_force_complete_move = apic_force_complete_move,
|
|
|
|
|
.irq_retrigger = apic_retrigger_irq,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
|
|
|
|
|
static void free_moved_vector(struct apic_chip_data *apicd)
|
|
|
|
|
{
|
|
|
|
|
unsigned int vector = apicd->prev_vector;
|
|
|
|
|
unsigned int cpu = apicd->prev_cpu;
|
|
|
|
|
bool managed = apicd->is_managed;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Managed interrupts are usually not migrated away
|
|
|
|
|
* from an online CPU, but CPU isolation 'managed_irq'
|
|
|
|
|
* can make that happen.
|
|
|
|
|
* 1) Activation does not take the isolation into account
|
|
|
|
|
* to keep the code simple
|
|
|
|
|
* 2) Migration away from an isolated CPU can happen when
|
|
|
|
|
* a non-isolated CPU which is in the calculated
|
|
|
|
|
* affinity mask comes online.
|
|
|
|
|
*/
|
|
|
|
|
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
|
|
|
|
|
irq_matrix_free(vector_matrix, cpu, vector, managed);
|
|
|
|
|
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
|
|
|
|
hlist_del_init(&apicd->clist);
|
|
|
|
|
apicd->prev_vector = 0;
|
|
|
|
|
apicd->move_in_progress = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
|
|
|
|
|
{
|
|
|
|
|
struct apic_chip_data *apicd;
|
|
|
|
|
@@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg)
|
|
|
|
|
__vector_schedule_cleanup(apicd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
|
|
|
|
|
*/
|
|
|
|
|
void irq_force_complete_move(struct irq_desc *desc)
|
|
|
|
|
{
|
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
struct apic_chip_data *apicd;
|
|
|
|
|
struct irq_data *irqd;
|
|
|
|
|
unsigned int vector;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The function is called for all descriptors regardless of which
|
|
|
|
|
* irqdomain they belong to. For example if an IRQ is provided by
|
|
|
|
|
* an irq_chip as part of a GPIO driver, the chip data for that
|
|
|
|
|
* descriptor is specific to the irq_chip in question.
|
|
|
|
|
*
|
|
|
|
|
* Check first that the chip_data is what we expect
|
|
|
|
|
* (apic_chip_data) before touching it any further.
|
|
|
|
|
*/
|
|
|
|
|
irqd = irq_domain_get_irq_data(x86_vector_domain,
|
|
|
|
|
irq_desc_get_irq(desc));
|
|
|
|
|
if (!irqd)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
raw_spin_lock(&vector_lock);
|
|
|
|
|
apicd = apic_chip_data(irqd);
|
|
|
|
|
if (!apicd)
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If prev_vector is empty or the descriptor is neither currently
|
|
|
|
|
* nor previously on the outgoing CPU no action required.
|
|
|
|
|
*/
|
|
|
|
|
vector = apicd->prev_vector;
|
|
|
|
|
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This is tricky. If the cleanup of the old vector has not been
|
|
|
|
|
* done yet, then the following setaffinity call will fail with
|
|
|
|
|
* -EBUSY. This can leave the interrupt in a stale state.
|
|
|
|
|
*
|
|
|
|
|
* All CPUs are stuck in stop machine with interrupts disabled so
|
|
|
|
|
* calling __irq_complete_move() would be completely pointless.
|
|
|
|
|
*
|
|
|
|
|
* 1) The interrupt is in move_in_progress state. That means that we
|
|
|
|
|
* have not seen an interrupt since the io_apic was reprogrammed to
|
|
|
|
|
* the new vector.
|
|
|
|
|
*
|
|
|
|
|
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
|
|
|
|
|
* have not been processed yet.
|
|
|
|
|
*/
|
|
|
|
|
if (apicd->move_in_progress) {
|
|
|
|
|
/*
|
|
|
|
|
* In theory there is a race:
|
|
|
|
|
*
|
|
|
|
|
* set_ioapic(new_vector) <-- Interrupt is raised before update
|
|
|
|
|
* is effective, i.e. it's raised on
|
|
|
|
|
* the old vector.
|
|
|
|
|
*
|
|
|
|
|
* So if the target cpu cannot handle that interrupt before
|
|
|
|
|
* the old vector is cleaned up, we get a spurious interrupt
|
|
|
|
|
* and in the worst case the ioapic irq line becomes stale.
|
|
|
|
|
*
|
|
|
|
|
* But in case of cpu hotplug this should be a non issue
|
|
|
|
|
* because if the affinity update happens right before all
|
|
|
|
|
* cpus rendezvous in stop machine, there is no way that the
|
|
|
|
|
* interrupt can be blocked on the target cpu because all cpus
|
|
|
|
|
* loops first with interrupts enabled in stop machine, so the
|
|
|
|
|
* old vector is not yet cleaned up when the interrupt fires.
|
|
|
|
|
*
|
|
|
|
|
* So the only way to run into this issue is if the delivery
|
|
|
|
|
* of the interrupt on the apic/system bus would be delayed
|
|
|
|
|
* beyond the point where the target cpu disables interrupts
|
|
|
|
|
* in stop machine. I doubt that it can happen, but at least
|
|
|
|
|
* there is a theoretical chance. Virtualization might be
|
|
|
|
|
* able to expose this, but AFAICT the IOAPIC emulation is not
|
|
|
|
|
* as stupid as the real hardware.
|
|
|
|
|
*
|
|
|
|
|
* Anyway, there is nothing we can do about that at this point
|
|
|
|
|
* w/o refactoring the whole fixup_irq() business completely.
|
|
|
|
|
* We print at least the irq number and the old vector number,
|
|
|
|
|
* so we have the necessary information when a problem in that
|
|
|
|
|
* area arises.
|
|
|
|
|
*/
|
|
|
|
|
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
|
|
|
|
|
irqd->irq, vector);
|
|
|
|
|
}
|
|
|
|
|
free_moved_vector(apicd);
|
|
|
|
|
unlock:
|
|
|
|
|
raw_spin_unlock(&vector_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
|
/*
|
|
|
|
|
* Note, this is not accurate accounting, but at least good enough to
|
|
|
|
|
|