diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index e893318560db9f79300579ff01e73b07b5baae38..5ad0ec1f0e29f750eee5c70889df5427d7413556 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -156,7 +156,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
-	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
 	dwc->initialized = true;
@@ -588,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 
 		spin_unlock_irqrestore(&dwc->lock, flags);
 	}
+
+	/* Re-enable interrupts */
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -618,11 +620,8 @@ static void dw_dma_tasklet(unsigned long data)
 			dwc_scan_descriptors(dw, dwc);
 	}
 
-	/*
-	 * Re-enable interrupts.
-	 */
+	/* Re-enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
 }
 
@@ -1261,6 +1260,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 int dw_dma_cyclic_start(struct dma_chan *chan)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
 	unsigned long		flags;
 
 	if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
@@ -1269,7 +1269,12 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 	}
 
 	spin_lock_irqsave(&dwc->lock, flags);
+
+	/* Enable interrupts to perform cyclic transfer */
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+
 	dwc_dostart(dwc, dwc->cdesc->desc[0]);
+
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return 0;
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 4c30fdd092b3b1e5b7e6050b7e7d1afee6c11e57..358f9689a3f5ace77d3dfd601f1873b880704060 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -108,6 +108,10 @@ static const struct pci_device_id dw_pci_id_table[] = {
 
 	/* Haswell */
 	{ PCI_VDEVICE(INTEL, 0x9c60) },
+
+	/* Broadwell */
+	{ PCI_VDEVICE(INTEL, 0x9ce0) },
+
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d92d655494068992f5a689ff83604158115bee4d..e3d7fcb69b4c2e4ffc4221c8ea8ec4e360648fd7 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -113,6 +113,9 @@
 #define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
 #define CHMAP_EXIST		BIT(24)
 
+/* CCSTAT register */
+#define EDMA_CCSTAT_ACTV	BIT(4)
+
 /*
  * Max of 20 segments per channel to conserve PaRAM slots
  * Also note that MAX_NR_SG should be atleast the no.of periods
@@ -1680,9 +1683,20 @@ static void edma_issue_pending(struct dma_chan *chan)
 	spin_unlock_irqrestore(&echan->vchan.lock, flags);
 }
 
+/*
+ * This limit exists to avoid a possible infinite loop when waiting for proof
+ * that a particular transfer is completed. This limit can be hit if there
+ * are large bursts to/from slow devices or the CPU is never able to catch
+ * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART
+ * RX-FIFO, as many as 55 loops have been seen.
+ */
+#define EDMA_MAX_TR_WAIT_LOOPS 1000
+
 static u32 edma_residue(struct edma_desc *edesc)
 {
 	bool dst = edesc->direction == DMA_DEV_TO_MEM;
+	int loop_count = EDMA_MAX_TR_WAIT_LOOPS;
+	struct edma_chan *echan = edesc->echan;
 	struct edma_pset *pset = edesc->pset;
 	dma_addr_t done, pos;
 	int i;
@@ -1691,7 +1705,32 @@ static u32 edma_residue(struct edma_desc *edesc)
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
+	pos = edma_get_position(echan->ecc, echan->slot[0], dst);
+
+	/*
+	 * "pos" may represent a transfer request that is still being
+	 * processed by the EDMACC or EDMATC. We will busy wait until
+	 * any one of the situations occurs:
+	 *   1. the DMA hardware is idle
+	 *   2. a new transfer request is setup
+	 *   3. we hit the loop limit
+	 */
+	while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) {
+		/* check if a new transfer request is setup */
+		if (edma_get_position(echan->ecc,
+				      echan->slot[0], dst) != pos) {
+			break;
+		}
+
+		if (!--loop_count) {
+			dev_dbg_ratelimited(echan->vchan.chan.device->dev,
+				"%s: timeout waiting for PaRAM update\n",
+				__func__);
+			break;
+		}
+
+		cpu_relax();
+	}
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1d5df2ef148b16d3c379a11e14a7da5283f9d5b8..21539d5c54c3d5c2d2dc3244650691bf414cf879 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -861,32 +861,42 @@ void ioat_timer_event(unsigned long data)
 			return;
 	}
 
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+
+	/* handle the no-actives case */
+	if (!ioat_ring_active(ioat_chan)) {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		check_active(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		return;
+	}
+
 	/* if we haven't made progress and we have already
 	 * acknowledged a pending completion once, then be more
 	 * forceful with a restart
 	 */
-	spin_lock_bh(&ioat_chan->cleanup_lock);
 	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
 		__cleanup(ioat_chan, phys_complete);
 	else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+		u32 chanerr;
+
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
+		dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+			 status, chanerr);
+		dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
+			 ioat_ring_active(ioat_chan));
+
 		spin_lock_bh(&ioat_chan->prep_lock);
 		ioat_restart_channel(ioat_chan);
 		spin_unlock_bh(&ioat_chan->prep_lock);
 		spin_unlock_bh(&ioat_chan->cleanup_lock);
 		return;
-	} else {
+	} else
 		set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
-		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
-	}
-
 
-	if (ioat_ring_active(ioat_chan))
-		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
-	else {
-		spin_lock_bh(&ioat_chan->prep_lock);
-		check_active(ioat_chan);
-		spin_unlock_bh(&ioat_chan->prep_lock);
-	}
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 	spin_unlock_bh(&ioat_chan->cleanup_lock);
 }