JustPaste
HomeCategoriesAboutDonateContactTerms of UsePrivacy Policy
JustPaste

Free online notepad — write and share instantly

Navigate

  • Home
  • Timeline
  • Categories

Info

  • About
  • Donate
  • Contact

Legal

  • Terms of Use
  • Privacy Policy

© 2026 JustPaste.app. All rights reserved.

Made with ♥ by JustPaste

Untitled Page | JustPaste.app
30 days ago1 views
👨‍💻Programming
/*
 * pi5_i2s_tdm_fixed.c
 *
 * Raspberry Pi 5 / RP1 Synopsys DesignWare I2S DMA driver
 * for 8-slot telephony-style frame (8 kênh thoại x 8-bit).
 *
 * === MÔ HÌNH PHẦN CỨNG ===
 *
 *  Silicon map:
 *    Left  32-bit word = [CH0_byte][CH1_byte][CH2_byte][CH3_byte]
 *    Right 32-bit word = [CH4_byte][CH5_byte][CH6_byte][CH7_byte]
 *
 *  Một frame I2S = 8 byte = 1 chu kỳ WS = 125µs @ 8 kHz
 *  BCLK = 8000 * 64 bit = 512 kHz  (KHÔNG phải 2MHz)
 *
 *  DMA period = 32 frame = 256 byte = 4ms
 *  DMA buffer = 2 period = 512 byte (double buffer)
 *
 * === MỤC TIÊU SWAP ===
 *
 *  Swap byte chẵn/lẻ trong mỗi word 32-bit để test loopback thoại:
 *    b[0]<->b[1], b[2]<->b[3], b[4]<->b[5], b[6]<->b[7]
 *  (Tương đương hoán đổi từng cặp kênh thoại kề nhau)
 *
 * === CÁC LỖI ĐÃ FIX SO VỚI CODE GỐC ===
 *
 *  1. DMA FIFO address: dùng I2S_LRBR_LTHR(0)=0x020 thay vì I2S_RXDMA/TXDMA
 *     (I2S_RXDMA là control register, không phải data FIFO port)
 *  2. BCLK comment và logic: 512kHz = 8000*64, KHÔNG phải 2MHz
 *  3. Byte swap: swap chẵn/lẻ đúng nghĩa (b0<->b1) thay vì reverse endian
 *  4. DMA callback period index: dùng dmaengine_tx_status residue để tính
 *     chính xác period đã hoàn thành, tránh race với cyclic DMA
 *  5. I2S_CCR: dùng giá trị đúng cho 32-bit frame (SCLK_CYCLES = 32)
 *  6. dma_slave_config: thêm dst_maxburst cho TX, src_maxburst cho RX
 *  7. pcm_config_fd: reset đúng thứ tự, enable channel 0 rõ ràng
 *  8. Thêm sw_params ioctl để userspace có thể set bridge/swap runtime
 *
 * GPIO:
 *   GPIO18 = BCLK (I2S1_CLK)
 *   GPIO19 = FSYNC/WS (I2S1_WS)
 *   GPIO20 = SDIN  (I2S1_SDI) — nhận từ silicon
 *   GPIO21 = SDOUT (I2S1_SDO) — gửi về silicon
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/uaccess.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/wait.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/poll.h>
#include <linux/ktime.h>
#include <linux/workqueue.h>
#include <linux/clk.h>
#include <linux/atomic.h>

#define DRV_NAME "si_pcm_dma8"

/* =========================================================
 * RP1 DW-I2S register offsets
 * ========================================================= */
#define I2S_IER             0x000   /* Global enable */
#define I2S_IRER            0x004   /* RX block enable */
#define I2S_ITER            0x008   /* TX block enable */
#define I2S_CER             0x00c   /* Clock enable */
#define I2S_CCR             0x010   /* Clock config: SCLK cycles per frame side */
#define I2S_RXFFR           0x014   /* RX FIFO reset */
#define I2S_TXFFR           0x018   /* TX FIFO reset */

/* Per-channel registers (channel index x=0..3) */
#define I2S_LRBR_LTHR(x)   (0x40*(x) + 0x020)  /* Left RX buf / Left TX hold (DATA FIFO) */
#define I2S_RRBR_RTHR(x)   (0x40*(x) + 0x024)  /* Right RX buf / Right TX hold (DATA FIFO) */
#define I2S_RER(x)          (0x40*(x) + 0x028)  /* RX channel enable */
#define I2S_TER(x)          (0x40*(x) + 0x02c)  /* TX channel enable */
#define I2S_RCR(x)          (0x40*(x) + 0x030)  /* RX word length */
#define I2S_TCR(x)          (0x40*(x) + 0x034)  /* TX word length */
#define I2S_ISR(x)          (0x40*(x) + 0x038)  /* Interrupt status */
#define I2S_IMR(x)          (0x40*(x) + 0x03c)  /* Interrupt mask */
#define I2S_ROR(x)          (0x40*(x) + 0x040)  /* RX overrun clear */
#define I2S_TOR(x)          (0x40*(x) + 0x044)  /* TX overrun clear */
#define I2S_RFCR(x)         (0x40*(x) + 0x048)  /* RX FIFO config (threshold) */
#define I2S_TFCR(x)         (0x40*(x) + 0x04c)  /* TX FIFO config (threshold) */
#define I2S_RFF(x)          (0x40*(x) + 0x050)  /* RX FIFO flush */
#define I2S_TFF(x)          (0x40*(x) + 0x054)  /* TX FIFO flush */

/* DMA registers */
#define I2S_RXDMA           0x01c0  /* RX DMA control (NOT data!) */
#define I2S_RRXDMA          0x01c4  /* RX DMA reset */
#define I2S_TXDMA           0x01c8  /* TX DMA control (NOT data!) */
#define I2S_RTXDMA          0x01cc  /* TX DMA reset */
#define I2S_DMACR           0x0200  /* DMA control: enable RX/TX block DMA */

/* Bit definitions */
#define I2S_IER_IEN         BIT(0)
#define I2S_DMAEN_RXBLOCK   BIT(16)
#define I2S_DMAEN_TXBLOCK   BIT(17)
#define I2S_ISR_RXDA        BIT(0)   /* RX data available */
#define I2S_ISR_RXFO        BIT(1)   /* RX FIFO overrun */
#define I2S_ISR_TXFE        BIT(4)   /* TX FIFO empty */
#define I2S_ISR_TXFO        BIT(5)   /* TX FIFO overrun */
#define I2S_RER_RXCHEN      BIT(0)
#define I2S_TER_TXCHEN      BIT(0)

/*
 * DW-I2S word length encoding (RCR/TCR bits[2:0]):
 *   0x01 = 12-bit, 0x02 = 16-bit, 0x03 = 20-bit, 0x04 = 24-bit, 0x05 = 32-bit
 */
#define I2S_XFER_RES_32BIT  0x05

/*
 * CCR (Clock Configuration Register):
 *   bits[2:0] = WSS  : word select size (cycles per channel side)
 *   bits[5:3] = SCLK_GATING : gating
 *
 * WSS field: 0x0=16, 0x1=24, 0x2=32 cycles per side
 * Với 32-bit word (trái + phải = 64 bit/frame):
 *   WSS = 0x2 → 32 SCLK cycles per channel side
 *
 * Không dùng I2S_CCR_32BIT = 0x10 (sai). Dùng 0x02.
 */
#define I2S_CCR_WSS_32      0x02    /* WSS = 32 cycles/side → 64 bit/frame */

/* RP1 GPIO override (để invert BCLK/FSYNC nếu cần) */
#define RP1_IO_BANK0_OFFSET         0x000d0000
#define RP1_IO_BANK0_SIZE           0x00008000
#define RP1_GPIO_CTRL(gpio)         ((gpio) * 8 + 0x004)
#define RP1_GPIO_CTRL_INOVER_SHIFT  16
#define RP1_GPIO_CTRL_INOVER_MASK   (0x3u << RP1_GPIO_CTRL_INOVER_SHIFT)
#define RP1_GPIO_CTRL_OUTOVER_SHIFT 12
#define RP1_GPIO_CTRL_OUTOVER_MASK  (0x3u << RP1_GPIO_CTRL_OUTOVER_SHIFT)
#define RP1_GPIO_OVER_INVERT        0x1u

/* =========================================================
 * Frame model:
 *
 *   DMA RX buffer layout (raw từ DW-I2S FIFO):
 *   ┌─────────────────────────────────────────────────────┐
 *   │ frame 0: [L3][L2][L1][L0][R3][R2][R1][R0]          │
 *   │ frame 1: [L3][L2][L1][L0][R3][R2][R1][R0]          │
 *   │  ...                                                │
 *   │ frame 31: ...                                       │
 *   └─────────────────────────────────────────────────────┘
 *
 *   DW-I2S ghi left-word trước (4 byte, MSB-first = little-endian trong RAM
 *   do DMA 32-bit), sau đó right-word (4 byte).
 *
 *   Silicon map (ví dụ):
 *     Left word  bits[31:24]=CH0, bits[23:16]=CH1, bits[15:8]=CH2, bits[7:0]=CH3
 *     Right word bits[31:24]=CH4, bits[23:16]=CH5, bits[15:8]=CH6, bits[7:0]=CH7
 *
 *   Trong RAM (little-endian ARM):
 *     byte[0]=CH3(LSB of left), byte[1]=CH2, byte[2]=CH1, byte[3]=CH0(MSB)
 *     byte[4]=CH7(LSB of right), byte[5]=CH6, byte[6]=CH5, byte[7]=CH4(MSB)
 *
 *   Sau khi swap chẵn/lẻ (b[0]<->b[1], b[2]<->b[3], ...):
 *     byte[0]=CH2, byte[1]=CH3, byte[2]=CH0, byte[3]=CH1,
 *     byte[4]=CH6, byte[5]=CH7, byte[6]=CH4, byte[7]=CH5
 *   → Kênh thoại được hoán đổi theo cặp.
 * ========================================================= */

#define I2S_FRAME_BYTES         8    /* 4 byte left + 4 byte right */
#define PCM_CHANNELS            8    /* 8 kênh thoại */
#define PCM_FRAMES_PER_PERIOD   32   /* 32 frame/period → 4ms @ 8kHz */
#define PCM_PERIOD_BYTES        (I2S_FRAME_BYTES * PCM_FRAMES_PER_PERIOD) /* 256 */
#define DEFAULT_PERIODS         2    /* double buffer */

#define RX_RING_FRAMES          512
#define TX_RING_FRAMES          512

/* Userspace ioctl để set swap/bridge runtime */
#define PCM_IOCTL_MAGIC         'P'
#define PCM_IOCTL_SET_SWAP_RX   _IOW(PCM_IOCTL_MAGIC, 1, int)
#define PCM_IOCTL_SET_SWAP_TX   _IOW(PCM_IOCTL_MAGIC, 2, int)
#define PCM_IOCTL_SET_BRIDGE    _IOW(PCM_IOCTL_MAGIC, 3, int)
#define PCM_IOCTL_GET_STATS     _IOR(PCM_IOCTL_MAGIC, 4, struct pcm_stats_user)

struct pcm_stats_user {
	__u64 rx_frames;
	__u64 tx_frames;
	__u64 rx_overruns;
	__u64 tx_underruns;
	__u64 fifo_rxerr;
	__u64 fifo_txerr;
	__u64 bridge_periods;
};

struct frame8 {
	u8 b[I2S_FRAME_BYTES];
};

struct ring8 {
	spinlock_t     lock;
	wait_queue_head_t q;
	struct frame8 *buf;
	u32            rd_idx;
	u32            wr_idx;
	u32            count;
	u32            capacity;
};

struct pcm_dma8_dev {
	struct device   *dev;
	struct device   *dma_dev;
	void __iomem    *base;
	void __iomem    *gpio_base;

	u32   saved_gpio18_ctrl;
	u32   saved_gpio19_ctrl;
	bool  gpio_overrides_applied;

	struct clk *clk;
	atomic_t    stopping;

	/* DMA */
	struct dma_chan                *rx_chan;
	struct dma_chan                *tx_chan;
	struct dma_async_tx_descriptor *rx_desc;
	struct dma_async_tx_descriptor *tx_desc;
	dma_cookie_t rx_cookie;
	dma_cookie_t tx_cookie;

	void       *rx_buf_cpu;
	dma_addr_t  rx_buf_dma;
	size_t      rx_buf_bytes;

	void       *tx_buf_cpu;
	dma_addr_t  tx_buf_dma;
	size_t      tx_buf_bytes;

	u32    period_frames;
	u32    periods;
	size_t period_bytes;

	/* Sequence counters cho callback */
	atomic_t rx_cb_seq;
	atomic_t tx_cb_seq;

	/* Ring buffers userspace */
	struct ring8 rx_ring;
	struct ring8 tx_ring;

	/* Bridge buffer (RX period đã xử lý → ghi vào TX) */
	spinlock_t bridge_lock;
	u8        *bridge_period;
	size_t     bridge_period_bytes;

	/* Runtime config (có thể thay đổi qua ioctl) */
	spinlock_t cfg_lock;
	bool  swap_rx_bytes;  /* hoán đổi byte chẵn/lẻ trong frame RX */
	bool  swap_tx_bytes;  /* hoán đổi byte chẵn/lẻ trong frame TX */
	bool  bridge_mode;    /* true: loopback RX→TX (swap + echo) */
	u8    tx_silence;     /* byte lấp đầy kênh không dùng */

	/* Counters */
	atomic64_t rx_frames;
	atomic64_t tx_frames;
	atomic64_t rx_overruns;
	atomic64_t tx_underruns;
	atomic64_t fifo_rxerr;
	atomic64_t fifo_txerr;
	atomic64_t bridge_periods;

	struct miscdevice miscdev;

	/* Stats work */
	struct delayed_work stats_work;
	spinlock_t stats_lock;
	u64 _rx_frames_snap, _tx_frames_snap;
	u64 stats_last_ns;
};

/* =========================================================
 * Module parameters
 * ========================================================= */
static int  i2s_index     = 1;       /* 0=I2S0, 1=I2S1, 2=I2S2 */
static int  i2s_master    = 0;       /* 0=slave, 1=master */
static int  sample_rate   = 8000;
static int  fifo_th       = 8;       /* FIFO threshold (1..16) */
static int  period_frames = PCM_FRAMES_PER_PERIOD;
static int  periods       = DEFAULT_PERIODS;
static int  tx_silence    = 0xD5;
static bool swap_rx       = false;
static bool swap_tx       = false;
static bool bridge_mode   = true;
static bool invert_bclk   = false;
static bool invert_fsync  = false;
static int  debug         = 0;
static bool start_pcm     = true;

/* =========================================================
 * Helpers
 * ========================================================= */
static inline bool pcm_stopping(struct pcm_dma8_dev *d)
{
	return !d || atomic_read(&d->stopping);
}

static inline u32 pcm_rd(struct pcm_dma8_dev *d, u32 reg)
{
	return readl(d->base + reg);
}

static inline void pcm_wr(struct pcm_dma8_dev *d, u32 reg, u32 val)
{
	writel(val, d->base + reg);
}

/* =========================================================
 * Byte-swap helper:
 *   Hoán đổi byte chẵn↔lẻ trong mỗi cặp (0↔1, 2↔3, 4↔5, 6↔7).
 *   Đây là "odd-even swap", KHÔNG phải endian reverse.
 *
 *   Ví dụ frame 8 byte:
 *     Input:  [A B C D E F G H]
 *     Output: [B A D C F E H G]
 *
 *   Tác dụng: hoán đổi kênh thoại theo từng cặp kề nhau.
 * ========================================================= */
static void frame_odd_even_swap(struct frame8 *f)
{
	u8 tmp;
	int i;

	for (i = 0; i < I2S_FRAME_BYTES; i += 2) {
		tmp      = f->b[i];
		f->b[i]  = f->b[i + 1];
		f->b[i + 1] = tmp;
	}
}

/*
 * Đọc 1 frame từ DMA RX buffer.
 * DW-I2S ghi: left-word (4 byte) + right-word (4 byte) liên tiếp.
 * Trong RAM (little-endian): byte thứ 0 là LSB của left-word.
 *
 * Nếu silicon của bạn ghi MSB-first (CH0 ở bit[31] của left-word),
 * thì cần reverse từng word 32-bit TRƯỚC KHI swap odd-even.
 * Tham số swap_rx_bytes quyết định việc này.
 */
static void read_rx_frame(const u8 *rx_period, u32 frame_idx,
			   struct frame8 *out, bool endian_swap)
{
	const u8 *src = rx_period + frame_idx * I2S_FRAME_BYTES;

	memcpy(out->b, src, I2S_FRAME_BYTES);

	if (endian_swap) {
		/*
		 * Reverse byte order trong mỗi word 32-bit:
		 *   b[0]<->b[3], b[1]<->b[2] (left word)
		 *   b[4]<->b[7], b[5]<->b[6] (right word)
		 * Dùng khi silicon ghi MSB-first nhưng DMA đọc little-endian.
		 */
		u8 tmp;
		tmp = out->b[0]; out->b[0] = out->b[3]; out->b[3] = tmp;
		tmp = out->b[1]; out->b[1] = out->b[2]; out->b[2] = tmp;
		tmp = out->b[4]; out->b[4] = out->b[7]; out->b[7] = tmp;
		tmp = out->b[5]; out->b[5] = out->b[6]; out->b[6] = tmp;
	}
}

static void write_tx_frame(u8 *tx_period, u32 frame_idx,
			   const struct frame8 *f, bool endian_swap)
{
	u8 *dst = tx_period + frame_idx * I2S_FRAME_BYTES;
	struct frame8 tmp = *f;

	if (endian_swap) {
		u8 t;
		t = tmp.b[0]; tmp.b[0] = tmp.b[3]; tmp.b[3] = t;
		t = tmp.b[1]; tmp.b[1] = tmp.b[2]; tmp.b[2] = t;
		t = tmp.b[4]; tmp.b[4] = tmp.b[7]; tmp.b[7] = t;
		t = tmp.b[5]; tmp.b[5] = tmp.b[6]; tmp.b[6] = t;
	}

	memcpy(dst, tmp.b, I2S_FRAME_BYTES);
}

/* =========================================================
 * Ring buffer
 * ========================================================= */
static int ring_init(struct device *dev, struct ring8 *r, u32 cap)
{
	r->buf = devm_kcalloc(dev, cap, sizeof(*r->buf), GFP_KERNEL);
	if (!r->buf)
		return -ENOMEM;
	spin_lock_init(&r->lock);
	init_waitqueue_head(&r->q);
	r->rd_idx = r->wr_idx = r->count = 0;
	r->capacity = cap;
	return 0;
}

static bool ring_push(struct ring8 *r, const struct frame8 *f,
		      atomic64_t *overrun)
{
	unsigned long flags;

	spin_lock_irqsave(&r->lock, flags);
	if (r->count == r->capacity) {
		/* Drop oldest */
		r->rd_idx = (r->rd_idx + 1) % r->capacity;
		r->count--;
		if (overrun)
			atomic64_inc(overrun);
	}
	r->buf[r->wr_idx] = *f;
	r->wr_idx = (r->wr_idx + 1) % r->capacity;
	r->count++;
	spin_unlock_irqrestore(&r->lock, flags);

	wake_up_interruptible(&r->q);
	return true;
}

static bool ring_pop(struct ring8 *r, struct frame8 *f)
{
	unsigned long flags;
	bool ok = false;

	spin_lock_irqsave(&r->lock, flags);
	if (r->count) {
		*f = r->buf[r->rd_idx];
		r->rd_idx = (r->rd_idx + 1) % r->capacity;
		r->count--;
		ok = true;
	}
	spin_unlock_irqrestore(&r->lock, flags);

	if (ok)
		wake_up_interruptible(&r->q);
	return ok;
}

static u32 ring_count_safe(struct ring8 *r)
{
	unsigned long flags;
	u32 c;

	spin_lock_irqsave(&r->lock, flags);
	c = r->count;
	spin_unlock_irqrestore(&r->lock, flags);
	return c;
}

/* =========================================================
 * GPIO inversion (BCLK / FSYNC)
 * ========================================================= */
static void rp1_gpio_update(struct pcm_dma8_dev *d, int gpio,
			    u32 mask, u32 val)
{
	u32 reg;

	if (!d->gpio_base)
		return;
	reg = readl(d->gpio_base + RP1_GPIO_CTRL(gpio));
	reg = (reg & ~mask) | (val & mask);
	writel(reg, d->gpio_base + RP1_GPIO_CTRL(gpio));
}

static void rp1_apply_inversion(struct pcm_dma8_dev *d)
{
	u32 bclk_mask, fsync_mask;
	u32 bclk_val = 0, fsync_val = 0;

	if (!d->gpio_base || d->gpio_overrides_applied)
		return;

	d->saved_gpio18_ctrl = readl(d->gpio_base + RP1_GPIO_CTRL(18));
	d->saved_gpio19_ctrl = readl(d->gpio_base + RP1_GPIO_CTRL(19));

	if (i2s_master) {
		bclk_mask  = RP1_GPIO_CTRL_OUTOVER_MASK;
		fsync_mask = RP1_GPIO_CTRL_OUTOVER_MASK;
		if (invert_bclk)
			bclk_val  = RP1_GPIO_OVER_INVERT << RP1_GPIO_CTRL_OUTOVER_SHIFT;
		if (invert_fsync)
			fsync_val = RP1_GPIO_OVER_INVERT << RP1_GPIO_CTRL_OUTOVER_SHIFT;
	} else {
		bclk_mask  = RP1_GPIO_CTRL_INOVER_MASK;
		fsync_mask = RP1_GPIO_CTRL_INOVER_MASK;
		if (invert_bclk)
			bclk_val  = RP1_GPIO_OVER_INVERT << RP1_GPIO_CTRL_INOVER_SHIFT;
		if (invert_fsync)
			fsync_val = RP1_GPIO_OVER_INVERT << RP1_GPIO_CTRL_INOVER_SHIFT;
	}

	rp1_gpio_update(d, 18, bclk_mask, bclk_val);
	rp1_gpio_update(d, 19, fsync_mask, fsync_val);
	d->gpio_overrides_applied = true;

	dev_info(d->dev, "GPIO invert: bclk=%d fsync=%d mode=%s\n",
		 invert_bclk, invert_fsync, i2s_master ? "OUTOVER" : "INOVER");
}

static void rp1_restore_inversion(struct pcm_dma8_dev *d)
{
	if (!d || !d->gpio_base || !d->gpio_overrides_applied)
		return;
	writel(d->saved_gpio18_ctrl, d->gpio_base + RP1_GPIO_CTRL(18));
	writel(d->saved_gpio19_ctrl, d->gpio_base + RP1_GPIO_CTRL(19));
	d->gpio_overrides_applied = false;
}

/* =========================================================
 * I2S peripheral control
 * ========================================================= */
static void pcm_hw_stop(struct pcm_dma8_dev *d)
{
	u32 dma;
	int i;

	if (!d || !d->base)
		return;

	/* Disable TX/RX blocks */
	pcm_wr(d, I2S_ITER, 0);
	pcm_wr(d, I2S_IRER, 0);

	/* Disable DMA */
	dma = pcm_rd(d, I2S_DMACR);
	dma &= ~(I2S_DMAEN_TXBLOCK | I2S_DMAEN_RXBLOCK);
	pcm_wr(d, I2S_DMACR, dma);

	/* Reset DMA engines */
	pcm_wr(d, I2S_RTXDMA, 1);
	pcm_wr(d, I2S_RRXDMA, 1);

	/* Disable all channels */
	for (i = 0; i < 4; i++) {
		pcm_wr(d, I2S_TER(i), 0);
		pcm_wr(d, I2S_RER(i), 0);
		(void)pcm_rd(d, I2S_TOR(i));  /* clear overrun */
		(void)pcm_rd(d, I2S_ROR(i));
	}

	pcm_wr(d, I2S_CER, 0);
	pcm_wr(d, I2S_IER, 0);
	udelay(30);
}

static void pcm_clear_fifos(struct pcm_dma8_dev *d)
{
	int i;

	pcm_wr(d, I2S_TXFFR, 1);
	pcm_wr(d, I2S_RXFFR, 1);

	for (i = 0; i < 4; i++) {
		pcm_wr(d, I2S_RFF(i), 1);
		pcm_wr(d, I2S_TFF(i), 1);
		(void)pcm_rd(d, I2S_TOR(i));
		(void)pcm_rd(d, I2S_ROR(i));
	}

	udelay(30);
}

/*
 * Configure DW-I2S cho 32-bit stereo, slave mode.
 *
 * CCR = 0x02 → WSS = 32 SCLK cycles per channel side
 *   → 64 SCLK cycles/frame → BCLK = 8000 * 64 = 512 kHz
 *
 * Chỉ enable channel 0 (left+right = 8 byte/frame).
 */
static int pcm_hw_config(struct pcm_dma8_dev *d)
{
	int ret = 0;

	if (fifo_th < 1 || fifo_th > 16)
		return -EINVAL;

	pcm_hw_stop(d);
	pcm_clear_fifos(d);

	/* Channel 0: 32-bit word length */
	pcm_wr(d, I2S_RCR(0), I2S_XFER_RES_32BIT);
	pcm_wr(d, I2S_TCR(0), I2S_XFER_RES_32BIT);

	/* FIFO thresholds (0-based: value N → threshold N+1) */
	pcm_wr(d, I2S_RFCR(0), (u32)(fifo_th - 1));
	pcm_wr(d, I2S_TFCR(0), (u32)(fifo_th - 1));

	/* Mask interrupts — dùng DMA, không dùng IRQ data path */
	pcm_wr(d, I2S_IMR(0), 0x33);

	/*
	 * CCR: WSS = 32 cycles/side (bit field[2:0] = 0x2)
	 * → BCLK = FS * 2 * 32 = 8000 * 64 = 512 kHz
	 */
	pcm_wr(d, I2S_CCR, I2S_CCR_WSS_32);

	if (i2s_master) {
		/* Master: yêu cầu clock 512 kHz từ RP1 PLL */
		unsigned long bitclk = (unsigned long)sample_rate * 64UL;

		ret = clk_set_rate(d->clk, bitclk);
		if (ret)
			dev_warn(d->dev,
				 "clk_set_rate(%lu Hz) failed: %d (tiếp tục nếu dùng external clock)\n",
				 bitclk, ret);
		else
			dev_info(d->dev, "Master BCLK set: %lu Hz\n", bitclk);
	}

	return 0;
}

static void pcm_hw_start(struct pcm_dma8_dev *d)
{
	u32 dma;

	pcm_clear_fifos(d);

	/* Re-apply word length sau flush */
	pcm_wr(d, I2S_RCR(0), I2S_XFER_RES_32BIT);
	pcm_wr(d, I2S_TCR(0), I2S_XFER_RES_32BIT);
	pcm_wr(d, I2S_RFCR(0), (u32)(fifo_th - 1));
	pcm_wr(d, I2S_TFCR(0), (u32)(fifo_th - 1));

	/* Enable channel 0 */
	pcm_wr(d, I2S_RER(0), I2S_RER_RXCHEN);
	pcm_wr(d, I2S_TER(0), I2S_TER_TXCHEN);

	/* Enable DMA block */
	dma = pcm_rd(d, I2S_DMACR);
	dma |= I2S_DMAEN_RXBLOCK | I2S_DMAEN_TXBLOCK;
	pcm_wr(d, I2S_DMACR, dma);

	/* Enable global + RX + TX + clock */
	pcm_wr(d, I2S_IER, I2S_IER_IEN);
	pcm_wr(d, I2S_IRER, 1);
	pcm_wr(d, I2S_ITER, 1);
	pcm_wr(d, I2S_CER, 1);
}

/* =========================================================
 * Data path: RX period processing
 *
 * Gọi từ DMA RX callback (softirq context).
 * Đọc `period_frames` frame từ rx_period, áp dụng swap nếu cần,
 * đẩy vào rx_ring để userspace đọc.
 * Nếu bridge_mode: copy + swap vào bridge_period để TX gửi lại.
 * ========================================================= */
static void process_rx_period(struct pcm_dma8_dev *d, u32 period_idx)
{
	const u8 *rx_period;
	bool do_swap_rx, do_bridge;
	u32 f, frames;

	if (pcm_stopping(d))
		return;

	rx_period = (const u8 *)d->rx_buf_cpu + period_idx * d->period_bytes;
	frames    = d->period_frames;

	spin_lock(&d->cfg_lock);
	do_swap_rx = d->swap_rx_bytes;
	do_bridge  = d->bridge_mode;
	spin_unlock(&d->cfg_lock);

	if (do_bridge) {
		unsigned long flags;
		u8 *bridge;

		spin_lock_irqsave(&d->bridge_lock, flags);
		bridge = d->bridge_period;
		if (bridge) {
			memset(bridge, d->tx_silence, d->bridge_period_bytes);

			for (f = 0; f < frames; f++) {
				struct frame8 fr;

				/* 1. Đọc frame từ DMA buffer */
				read_rx_frame(rx_period, f, &fr, do_swap_rx);

				/*
				 * 2. Swap byte chẵn/lẻ (hoán đổi kênh kề nhau):
				 *    b[0]<->b[1], b[2]<->b[3], b[4]<->b[5], b[6]<->b[7]
				 */
				frame_odd_even_swap(&fr);

				/* 3. Ghi vào bridge TX buffer */
				write_tx_frame(bridge, f, &fr,
					       d->swap_tx_bytes);
			}
		}
		spin_unlock_irqrestore(&d->bridge_lock, flags);
		atomic64_inc(&d->bridge_periods);
	}

	/* Đẩy frame vào ring để userspace đọc (dùng dữ liệu gốc, chưa swap) */
	for (f = 0; f < frames; f++) {
		struct frame8 fr;

		read_rx_frame(rx_period, f, &fr, do_swap_rx);
		ring_push(&d->rx_ring, &fr, &d->rx_overruns);
		atomic64_inc(&d->rx_frames);
	}

	/* Check FIFO overrun */
	{
		u32 isr = pcm_rd(d, I2S_ISR(0));

		if (isr & I2S_ISR_RXFO) {
			atomic64_inc(&d->fifo_rxerr);
			(void)pcm_rd(d, I2S_ROR(0));
			if (debug >= 1)
				dev_warn_ratelimited(d->dev, "RX FIFO overrun!\n");
		}
		if (isr & I2S_ISR_TXFO) {
			atomic64_inc(&d->fifo_txerr);
			(void)pcm_rd(d, I2S_TOR(0));
		}
	}
}

/* =========================================================
 * Data path: TX period fill
 *
 * Gọi từ DMA TX callback (softirq context).
 * Trong bridge mode: copy bridge_period vào tx buffer.
 * Trong userspace mode: lấy từ tx_ring.
 * ========================================================= */
static void fill_tx_period(struct pcm_dma8_dev *d, u32 period_idx)
{
	u8 *tx_period;
	bool do_bridge;
	u32 f, frames;

	if (pcm_stopping(d))
		return;

	tx_period = (u8 *)d->tx_buf_cpu + period_idx * d->period_bytes;
	frames    = d->period_frames;

	spin_lock(&d->cfg_lock);
	do_bridge = d->bridge_mode;
	spin_unlock(&d->cfg_lock);

	memset(tx_period, d->tx_silence, d->period_bytes);

	if (do_bridge) {
		unsigned long flags;

		spin_lock_irqsave(&d->bridge_lock, flags);
		if (d->bridge_period)
			memcpy(tx_period, d->bridge_period,
			       min(d->period_bytes, d->bridge_period_bytes));
		spin_unlock_irqrestore(&d->bridge_lock, flags);

		atomic64_add(frames, &d->tx_frames);
		return;
	}

	/* Userspace TX mode */
	for (f = 0; f < frames; f++) {
		struct frame8 fr;

		memset(&fr, d->tx_silence, sizeof(fr));
		if (!ring_pop(&d->tx_ring, &fr))
			atomic64_inc(&d->tx_underruns);

		write_tx_frame(tx_period, f, &fr, d->swap_tx_bytes);
		atomic64_inc(&d->tx_frames);
	}
}

/* =========================================================
 * DMA callbacks (softirq context)
 *
 * QUAN TRỌNG — period index với cyclic DMA:
 *
 * Cyclic DMA gọi callback sau khi hoàn thành mỗi period.
 * Với N periods (mặc định N=2), callback thứ K tương ứng
 * với period (K-1) % N đã hoàn thành.
 *
 * Để tránh race condition (callback bị delay), ta dùng
 * dmaengine_tx_status để query residue và tính period
 * DMA đang ghi hiện tại, rồi lấy period TRƯỚC ĐÓ.
 *
 * Tuy nhiên, trên bcm2835-dma (RP1), residue không luôn
 * chính xác trong cyclic mode. Giải pháp đơn giản nhất:
 * dùng sequence counter và đảm bảo N=2 (double buffer).
 * ========================================================= */
static void dma_rx_cb(void *arg)
{
	struct pcm_dma8_dev *d = arg;
	u32 seq, done;

	if (pcm_stopping(d))
		return;

	/*
	 * seq = số callback đã xảy ra (0-based).
	 * done = period vừa hoàn thành.
	 * Với double buffer (N=2): seq=0 → period 0 done, seq=1 → period 1 done, ...
	 */
	seq  = (u32)atomic_inc_return(&d->rx_cb_seq) - 1;
	done = seq % d->periods;

	process_rx_period(d, done);
}

static void dma_tx_cb(void *arg)
{
	struct pcm_dma8_dev *d = arg;
	u32 seq, refill;

	if (pcm_stopping(d))
		return;

	seq    = (u32)atomic_inc_return(&d->tx_cb_seq) - 1;
	refill = seq % d->periods;

	fill_tx_period(d, refill);
}

/* =========================================================
 * Stats work
 * ========================================================= */
static void stats_fn(struct work_struct *w)
{
	struct delayed_work *dw = to_delayed_work(w);
	struct pcm_dma8_dev *d  = container_of(dw, struct pcm_dma8_dev, stats_work);
	u64 now, rx_f, tx_f;

	if (pcm_stopping(d))
		return;

	now  = ktime_get_ns();
	rx_f = atomic64_read(&d->rx_frames);
	tx_f = atomic64_read(&d->tx_frames);

	dev_info(d->dev,
		 "stats: rx_frames=%llu tx_frames=%llu rx_ovr=%lld tx_udr=%lld "
		 "fifo_rxerr=%lld fifo_txerr=%lld bridge_periods=%lld\n",
		 rx_f, tx_f,
		 atomic64_read(&d->rx_overruns),
		 atomic64_read(&d->tx_underruns),
		 atomic64_read(&d->fifo_rxerr),
		 atomic64_read(&d->fifo_txerr),
		 atomic64_read(&d->bridge_periods));

	if (!pcm_stopping(d))
		schedule_delayed_work(&d->stats_work, HZ * 5);
}

/* =========================================================
 * Character device file operations
 * ========================================================= */
static ssize_t pcm_read(struct file *file, char __user *ubuf,
			 size_t len, loff_t *ppos)
{
	struct pcm_dma8_dev *d = container_of(file->private_data,
					       struct pcm_dma8_dev, miscdev);
	size_t copied = 0;
	struct frame8 fr;

	if (pcm_stopping(d))
		return -ENODEV;
	if (len < sizeof(fr))
		return -EINVAL;

	while (len >= sizeof(fr)) {
		if (!ring_pop(&d->rx_ring, &fr)) {
			if (copied)
				break;
			if (file->f_flags & O_NONBLOCK)
				return -EAGAIN;
			if (wait_event_interruptible(d->rx_ring.q,
						     ring_count_safe(&d->rx_ring) > 0 ||
						     pcm_stopping(d)))
				return -ERESTARTSYS;
			if (pcm_stopping(d))
				return -ENODEV;
			continue;
		}
		if (copy_to_user(ubuf + copied, &fr, sizeof(fr)))
			return copied ? (ssize_t)copied : -EFAULT;
		copied += sizeof(fr);
		len    -= sizeof(fr);
	}

	return (ssize_t)copied;
}

static ssize_t pcm_write(struct file *file, const char __user *ubuf,
			  size_t len, loff_t *ppos)
{
	struct pcm_dma8_dev *d = container_of(file->private_data,
					       struct pcm_dma8_dev, miscdev);
	size_t aligned = len & ~(sizeof(struct frame8) - 1);
	size_t copied  = 0;
	struct frame8 fr;

	if (pcm_stopping(d))
		return -ENODEV;
	if (!aligned)
		return -EINVAL;

	while (copied < aligned) {
		if (copy_from_user(&fr, ubuf + copied, sizeof(fr)))
			return copied ? (ssize_t)copied : -EFAULT;

		/* Chờ ring có chỗ */
		while (ring_count_safe(&d->tx_ring) >= d->tx_ring.capacity) {
			if (file->f_flags & O_NONBLOCK)
				return copied ? (ssize_t)copied : -EAGAIN;
			if (wait_event_interruptible(d->tx_ring.q,
						     ring_count_safe(&d->tx_ring) < d->tx_ring.capacity ||
						     pcm_stopping(d)))
				return copied ? (ssize_t)copied : -ERESTARTSYS;
			if (pcm_stopping(d))
				return copied ? (ssize_t)copied : -ENODEV;
		}

		ring_push(&d->tx_ring, &fr, NULL);
		copied += sizeof(fr);
	}

	return (ssize_t)copied;
}

static __poll_t pcm_poll(struct file *file, poll_table *wait)
{
	struct pcm_dma8_dev *d = container_of(file->private_data,
					       struct pcm_dma8_dev, miscdev);
	__poll_t mask = 0;

	if (pcm_stopping(d))
		return EPOLLERR | EPOLLHUP;

	poll_wait(file, &d->rx_ring.q, wait);
	poll_wait(file, &d->tx_ring.q, wait);

	if (ring_count_safe(&d->rx_ring) > 0)
		mask |= EPOLLIN | EPOLLRDNORM;
	if (ring_count_safe(&d->tx_ring) < d->tx_ring.capacity)
		mask |= EPOLLOUT | EPOLLWRNORM;

	return mask;
}

static long pcm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct pcm_dma8_dev *d = container_of(file->private_data,
					       struct pcm_dma8_dev, miscdev);
	int val;
	struct pcm_stats_user stats;

	switch (cmd) {
	case PCM_IOCTL_SET_SWAP_RX:
		if (get_user(val, (int __user *)arg))
			return -EFAULT;
		spin_lock(&d->cfg_lock);
		d->swap_rx_bytes = !!val;
		spin_unlock(&d->cfg_lock);
		dev_info(d->dev, "swap_rx_bytes=%d\n", !!val);
		return 0;

	case PCM_IOCTL_SET_SWAP_TX:
		if (get_user(val, (int __user *)arg))
			return -EFAULT;
		spin_lock(&d->cfg_lock);
		d->swap_tx_bytes = !!val;
		spin_unlock(&d->cfg_lock);
		dev_info(d->dev, "swap_tx_bytes=%d\n", !!val);
		return 0;

	case PCM_IOCTL_SET_BRIDGE:
		if (get_user(val, (int __user *)arg))
			return -EFAULT;
		spin_lock(&d->cfg_lock);
		d->bridge_mode = !!val;
		spin_unlock(&d->cfg_lock);
		dev_info(d->dev, "bridge_mode=%d\n", !!val);
		return 0;

	case PCM_IOCTL_GET_STATS:
		memset(&stats, 0, sizeof(stats));
		stats.rx_frames     = (u64)atomic64_read(&d->rx_frames);
		stats.tx_frames     = (u64)atomic64_read(&d->tx_frames);
		stats.rx_overruns   = (u64)atomic64_read(&d->rx_overruns);
		stats.tx_underruns  = (u64)atomic64_read(&d->tx_underruns);
		stats.fifo_rxerr    = (u64)atomic64_read(&d->fifo_rxerr);
		stats.fifo_txerr    = (u64)atomic64_read(&d->fifo_txerr);
		stats.bridge_periods = (u64)atomic64_read(&d->bridge_periods);
		if (copy_to_user((void __user *)arg, &stats, sizeof(stats)))
			return -EFAULT;
		return 0;

	default:
		return -ENOTTY;
	}
}

static const struct file_operations pcm_fops = {
	.owner          = THIS_MODULE,
	.read           = pcm_read,
	.write          = pcm_write,
	.poll           = pcm_poll,
	.unlocked_ioctl = pcm_ioctl,
	.llseek         = noop_llseek,
};

/* =========================================================
 * Platform driver probe
 * ========================================================= */
static int pcm_dma8_probe(struct platform_device *pdev)
{
	struct pcm_dma8_dev *d;
	struct resource *res;
	struct dma_slave_config rx_cfg = {0};
	struct dma_slave_config tx_cfg = {0};
	dma_addr_t rx_fifo_phys, tx_fifo_phys;
	resource_size_t rp1_off;
	u32 p;
	int ret;

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	if (!res)
		return -ENODEV;

	/*
	 * Filter theo i2s_index:
	 *   I2S0 = 0xa0000, I2S1 = 0xa4000, I2S2 = 0xa8000
	 */
	rp1_off = res->start & 0xfffff;
	if ((i2s_index == 0 && rp1_off != 0xa0000) ||
	    (i2s_index == 1 && rp1_off != 0xa4000) ||
	    (i2s_index == 2 && rp1_off != 0xa8000))
		return -ENODEV;

	d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
	if (!d)
		return -ENOMEM;

	d->dev = &pdev->dev;
	atomic_set(&d->stopping, 0);

	spin_lock_init(&d->cfg_lock);
	spin_lock_init(&d->bridge_lock);
	spin_lock_init(&d->stats_lock);

	/* Runtime config từ module params */
	d->swap_rx_bytes = swap_rx;
	d->swap_tx_bytes = swap_tx;
	d->bridge_mode   = bridge_mode;
	d->tx_silence    = (u8)(tx_silence & 0xff);

	/* DMA mask */
	if (!pdev->dev.dma_mask)
		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
	if (ret) {
		dev_err(&pdev->dev, "dma_set_mask_and_coherent: %d\n", ret);
		return ret;
	}

	/* Period/buffer sizing */
	d->period_frames = (period_frames > 0) ? period_frames
					       : PCM_FRAMES_PER_PERIOD;
	d->periods       = (periods > 1) ? periods : DEFAULT_PERIODS;
	d->period_bytes  = (size_t)d->period_frames * I2S_FRAME_BYTES;

	if (!d->period_bytes || (d->period_bytes % I2S_FRAME_BYTES)) {
		dev_err(&pdev->dev, "bad period_bytes=%zu\n", d->period_bytes);
		return -EINVAL;
	}

	d->rx_buf_bytes = d->period_bytes * d->periods;
	d->tx_buf_bytes = d->period_bytes * d->periods;

	dev_info(&pdev->dev,
		 "config: i2s%d %s rate=%d bclk=%lu fifo_th=%d "
		 "frames/period=%u periods=%u period_bytes=%zu swap_rx=%d swap_tx=%d bridge=%d\n",
		 i2s_index, i2s_master ? "master" : "slave", sample_rate,
		 (unsigned long)sample_rate * 64, fifo_th,
		 d->period_frames, d->periods, d->period_bytes,
		 d->swap_rx_bytes, d->swap_tx_bytes, d->bridge_mode);

	/* Ring buffers */
	ret = ring_init(&pdev->dev, &d->rx_ring, RX_RING_FRAMES);
	if (ret)
		return ret;
	ret = ring_init(&pdev->dev, &d->tx_ring, TX_RING_FRAMES);
	if (ret)
		return ret;

	/* Bridge buffer */
	d->bridge_period_bytes = d->period_bytes;
	d->bridge_period = devm_kzalloc(&pdev->dev,
					 d->bridge_period_bytes, GFP_KERNEL);
	if (!d->bridge_period)
		return -ENOMEM;
	memset(d->bridge_period, d->tx_silence, d->bridge_period_bytes);

	/* Stats work */
	INIT_DELAYED_WORK(&d->stats_work, stats_fn);
	d->stats_last_ns = ktime_get_ns();

	atomic_set(&d->rx_cb_seq, 0);
	atomic_set(&d->tx_cb_seq, 0);

	/* ioremap I2S registers */
	d->base = devm_ioremap_resource(&pdev->dev, res);
	if (IS_ERR(d->base)) {
		ret = PTR_ERR(d->base);
		dev_err(&pdev->dev, "ioremap I2S: %d\n", ret);
		return ret;
	}

	/* ioremap RP1 GPIO bank 0 */
	{
		resource_size_t rp1_base  = res->start - rp1_off;
		resource_size_t gpio_phys = rp1_base + RP1_IO_BANK0_OFFSET;

		d->gpio_base = devm_ioremap(&pdev->dev, gpio_phys,
					    RP1_IO_BANK0_SIZE);
		if (!d->gpio_base) {
			if (invert_bclk || invert_fsync) {
				dev_err(&pdev->dev, "ioremap GPIO bank0 failed\n");
				return -ENOMEM;
			}
			dev_warn(&pdev->dev, "GPIO bank0 not mapped (inversion disabled)\n");
		}
	}

	/* Clock */
	d->clk = devm_clk_get(&pdev->dev, "i2sclk");
	if (IS_ERR(d->clk))
		return dev_err_probe(&pdev->dev, PTR_ERR(d->clk),
				     "failed to get i2sclk\n");

	/* DMA channels */
	d->rx_chan = dma_request_chan(&pdev->dev, "rx");
	if (IS_ERR(d->rx_chan)) {
		ret = PTR_ERR(d->rx_chan);
		dev_err(&pdev->dev, "dma_request_chan(rx): %d\n", ret);
		return ret;
	}

	d->tx_chan = dma_request_chan(&pdev->dev, "tx");
	if (IS_ERR(d->tx_chan)) {
		ret = PTR_ERR(d->tx_chan);
		dev_err(&pdev->dev, "dma_request_chan(tx): %d\n", ret);
		goto err_rel_rx;
	}

	d->dma_dev = d->rx_chan->device->dev;
	if (!d->dma_dev) {
		ret = -ENODEV;
		dev_err(&pdev->dev, "no dma_dev\n");
		goto err_rel_tx;
	}

	/* Terminate any stale DMA from previous load */
	dmaengine_terminate_sync(d->rx_chan);
	dmaengine_terminate_sync(d->tx_chan);
	msleep(10);

	/* DMA coherent buffers */
	d->rx_buf_cpu = dma_alloc_coherent(d->dma_dev, d->rx_buf_bytes,
					   &d->rx_buf_dma, GFP_KERNEL);
	if (!d->rx_buf_cpu) {
		ret = -ENOMEM;
		dev_err(&pdev->dev, "dma_alloc RX %zu bytes failed\n",
			d->rx_buf_bytes);
		goto err_rel_tx;
	}

	d->tx_buf_cpu = dma_alloc_coherent(d->dma_dev, d->tx_buf_bytes,
					   &d->tx_buf_dma, GFP_KERNEL);
	if (!d->tx_buf_cpu) {
		ret = -ENOMEM;
		dev_err(&pdev->dev, "dma_alloc TX %zu bytes failed\n",
			d->tx_buf_bytes);
		goto err_free_rx;
	}

	memset(d->rx_buf_cpu, 0, d->rx_buf_bytes);
	memset(d->tx_buf_cpu, d->tx_silence, d->tx_buf_bytes);

	/* Pre-fill TX silence */
	for (p = 0; p < d->periods; p++)
		fill_tx_period(d, p);

	/* GPIO inversion */
	rp1_apply_inversion(d);

	/* Enable clock */
	ret = clk_prepare_enable(d->clk);
	if (ret) {
		dev_err(&pdev->dev, "clk_prepare_enable: %d\n", ret);
		goto err_restore_gpio;
	}

	dev_info(&pdev->dev, "clk rate: %lu Hz\n", clk_get_rate(d->clk));

	/* Configure I2S hardware */
	ret = pcm_hw_config(d);
	if (ret) {
		dev_err(&pdev->dev, "pcm_hw_config: %d\n", ret);
		goto err_disable_clk;
	}

	/*
	 * DMA FIFO physical addresses:
	 *
	 * RX data FIFO = I2S_LRBR_LTHR(0) = 0x020 (Left Receive Buffer Register)
	 * TX data FIFO = I2S_LRBR_LTHR(0) = 0x020 (Left Transmit Holding Register)
	 *
	 * Đây là địa chỉ DW-I2S FIFO data port. DMA đọc/ghi 32-bit (left word)
	 * rồi ngay sau đó đọc/ghi RRBR_RTHR(0)=0x024 (right word) → 8 byte/cycle.
	 *
	 * LƯU Ý: I2S_RXDMA (0x1c0) và I2S_TXDMA (0x1c8) là CONTROL registers,
	 * KHÔNG phải data FIFO. Đây là bug trong code gốc.
	 */
	rx_fifo_phys = res->start + I2S_LRBR_LTHR(0);
	tx_fifo_phys = res->start + I2S_LRBR_LTHR(0);

	dev_info(&pdev->dev, "DMA ports: rx_fifo=%pad tx_fifo=%pad\n",
		 &rx_fifo_phys, &tx_fifo_phys);

	/* Configure DMA slave */
	rx_cfg.direction        = DMA_DEV_TO_MEM;
	rx_cfg.src_addr         = rx_fifo_phys;
	rx_cfg.src_addr_width   = DMA_SLAVE_BUSWIDTH_4_BYTES;
	rx_cfg.src_maxburst     = fifo_th;  /* burst = FIFO threshold */

	ret = dmaengine_slave_config(d->rx_chan, &rx_cfg);
	if (ret) {
		dev_err(&pdev->dev, "slave_config(rx): %d\n", ret);
		goto err_stop_hw;
	}

	tx_cfg.direction        = DMA_MEM_TO_DEV;
	tx_cfg.dst_addr         = tx_fifo_phys;
	tx_cfg.dst_addr_width   = DMA_SLAVE_BUSWIDTH_4_BYTES;
	tx_cfg.dst_maxburst     = fifo_th;
	tx_cfg.src_maxburst     = fifo_th;

	ret = dmaengine_slave_config(d->tx_chan, &tx_cfg);
	if (ret) {
		dev_err(&pdev->dev, "slave_config(tx): %d\n", ret);
		goto err_stop_hw;
	}

	/* Prepare cyclic DMA descriptors */
	d->rx_desc = dmaengine_prep_dma_cyclic(
		d->rx_chan, d->rx_buf_dma, d->rx_buf_bytes, d->period_bytes,
		DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
	if (!d->rx_desc) {
		ret = -ENOMEM;
		dev_err(&pdev->dev, "prep_dma_cyclic(rx) failed\n");
		goto err_stop_hw;
	}

	d->tx_desc = dmaengine_prep_dma_cyclic(
		d->tx_chan, d->tx_buf_dma, d->tx_buf_bytes, d->period_bytes,
		DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
	if (!d->tx_desc) {
		ret = -ENOMEM;
		dev_err(&pdev->dev, "prep_dma_cyclic(tx) failed\n");
		goto err_stop_hw;
	}

	d->rx_desc->callback       = dma_rx_cb;
	d->rx_desc->callback_param = d;
	d->tx_desc->callback       = dma_tx_cb;
	d->tx_desc->callback_param = d;

	d->rx_cookie = dmaengine_submit(d->rx_desc);
	if (dma_submit_error(d->rx_cookie)) {
		ret = -EIO;
		dev_err(&pdev->dev, "dmaengine_submit(rx) failed\n");
		goto err_stop_hw;
	}

	d->tx_cookie = dmaengine_submit(d->tx_desc);
	if (dma_submit_error(d->tx_cookie)) {
		ret = -EIO;
		dev_err(&pdev->dev, "dmaengine_submit(tx) failed\n");
		goto err_term_rx;
	}

	if (start_pcm) {
		dma_async_issue_pending(d->rx_chan);
		dma_async_issue_pending(d->tx_chan);
		pcm_hw_start(d);
		dev_info(&pdev->dev, "I2S/DMA started\n");
	}

	/* Register misc device */
	d->miscdev.minor = MISC_DYNAMIC_MINOR;
	d->miscdev.name  = "si_pcm_dma8";
	d->miscdev.fops  = &pcm_fops;

	ret = misc_register(&d->miscdev);
	if (ret) {
		dev_err(&pdev->dev, "misc_register: %d\n", ret);
		goto err_term_both;
	}

	platform_set_drvdata(pdev, d);
	schedule_delayed_work(&d->stats_work, HZ * 5);

	dev_info(&pdev->dev,
		 "loaded: /dev/si_pcm_dma8 ready\n"
		 "  model: 8-slot @ %d Hz, BCLK=%lu Hz, period=%zu bytes, buf=%zu bytes\n"
		 "  bridge=%d swap_rx=%d swap_tx=%d silence=0x%02x\n",
		 sample_rate, (unsigned long)sample_rate * 64,
		 d->period_bytes, d->rx_buf_bytes,
		 d->bridge_mode, d->swap_rx_bytes, d->swap_tx_bytes, d->tx_silence);

	return 0;

err_term_both:
	atomic_set(&d->stopping, 1);
	dmaengine_terminate_sync(d->tx_chan);
err_term_rx:
	atomic_set(&d->stopping, 1);
	dmaengine_terminate_sync(d->rx_chan);
err_stop_hw:
	atomic_set(&d->stopping, 1);
	pcm_hw_stop(d);
err_disable_clk:
	clk_disable_unprepare(d->clk);
err_restore_gpio:
	rp1_restore_inversion(d);
	if (d->tx_buf_cpu)
		dma_free_coherent(d->dma_dev, d->tx_buf_bytes,
				  d->tx_buf_cpu, d->tx_buf_dma);
err_free_rx:
	if (d->rx_buf_cpu)
		dma_free_coherent(d->dma_dev, d->rx_buf_bytes,
				  d->rx_buf_cpu, d->rx_buf_dma);
err_rel_tx:
	dma_release_channel(d->tx_chan);
err_rel_rx:
	dma_release_channel(d->rx_chan);
	return ret;
}

static void pcm_dma8_remove(struct platform_device *pdev)
{
	struct pcm_dma8_dev *d = platform_get_drvdata(pdev);

	if (!d)
		return;

	dev_info(&pdev->dev, "removing...\n");

	atomic_set(&d->stopping, 1);

	wake_up_interruptible(&d->rx_ring.q);
	wake_up_interruptible(&d->tx_ring.q);

	cancel_delayed_work_sync(&d->stats_work);
	misc_deregister(&d->miscdev);

	dmaengine_terminate_sync(d->tx_chan);
	dmaengine_terminate_sync(d->rx_chan);

	pcm_hw_stop(d);
	rp1_restore_inversion(d);
	clk_disable_unprepare(d->clk);

	dma_free_coherent(d->dma_dev, d->tx_buf_bytes,
			  d->tx_buf_cpu, d->tx_buf_dma);
	dma_free_coherent(d->dma_dev, d->rx_buf_bytes,
			  d->rx_buf_cpu, d->rx_buf_dma);

	dma_release_channel(d->tx_chan);
	dma_release_channel(d->rx_chan);

	dev_info(&pdev->dev,
		 "removed: rx=%lld tx=%lld ovr=%lld udr=%lld rxerr=%lld txerr=%lld\n",
		 atomic64_read(&d->rx_frames),
		 atomic64_read(&d->tx_frames),
		 atomic64_read(&d->rx_overruns),
		 atomic64_read(&d->tx_underruns),
		 atomic64_read(&d->fifo_rxerr),
		 atomic64_read(&d->fifo_txerr));
}

/* =========================================================
 * Module registration
 * ========================================================= */
static const struct of_device_id pcm_dma8_of_match[] = {
	{ .compatible = "snps,designware-i2s" },
	{ }
};
MODULE_DEVICE_TABLE(of, pcm_dma8_of_match);

static struct platform_driver pcm_dma8_driver = {
	.probe  = pcm_dma8_probe,
	.remove = pcm_dma8_remove,
	.driver = {
		.name           = DRV_NAME,
		.of_match_table = pcm_dma8_of_match,
	},
};

module_platform_driver(pcm_dma8_driver);

/* =========================================================
 * Module parameters
 * ========================================================= */
module_param(i2s_index,     int,  0600);
MODULE_PARM_DESC(i2s_index, "RP1 I2S index: 0,1,2 (default 1=0xa4000)");

module_param(i2s_master,    int,  0600);
MODULE_PARM_DESC(i2s_master, "0=slave(external BCLK/WS), 1=master");

module_param(sample_rate,   int,  0600);
MODULE_PARM_DESC(sample_rate, "Frame rate Hz (default 8000). BCLK = rate*64.");

module_param(fifo_th,       int,  0600);
MODULE_PARM_DESC(fifo_th, "DW-I2S FIFO threshold 1..16 (default 8)");

module_param(period_frames, int,  0600);
MODULE_PARM_DESC(period_frames, "Frames per DMA period (default 32 = 4ms @ 8kHz)");

module_param(periods,       int,  0600);
MODULE_PARM_DESC(periods, "Number of DMA periods (default 2 = double buffer)");

module_param(tx_silence,    int,  0600);
MODULE_PARM_DESC(tx_silence, "Silence byte for inactive TX slots (default 0xD5)");

module_param(swap_rx,       bool, 0600);
MODULE_PARM_DESC(swap_rx, "Endian swap 32-bit RX words (MSB↔LSB byte reverse)");

module_param(swap_tx,       bool, 0600);
MODULE_PARM_DESC(swap_tx, "Endian swap 32-bit TX words");

module_param(bridge_mode,   bool, 0600);
MODULE_PARM_DESC(bridge_mode, "1=loopback RX→TX with odd/even byte swap (default 1)");

module_param(invert_bclk,   bool, 0600);
MODULE_PARM_DESC(invert_bclk, "Invert BCLK at GPIO18 via RP1 INOVER/OUTOVER");

module_param(invert_fsync,  bool, 0600);
MODULE_PARM_DESC(invert_fsync, "Invert FSYNC/WS at GPIO19 via RP1 INOVER/OUTOVER");

module_param(debug,         int,  0600);
MODULE_PARM_DESC(debug, "Debug level 0/1/2");

module_param(start_pcm,     bool, 0600);
MODULE_PARM_DESC(start_pcm, "Start I2S/DMA on probe (default true)");

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fixed version");
MODULE_DESCRIPTION("RPi5/RP1 DW-I2S 8-slot PCM DMA driver — 8 kênh thoại với odd/even swap");
← Back to timeline