@misc{9224035,
  abstract     = {{QR decomposition (QRD) is a computationally-intensive matrix factorization method which is widely used in signal processing. Meeting the stringent processing budgets of real-time applications necessitates dedicated hardware acceleration. This thesis presents the architecture and Register-Transfer-Level (RTL) implementation of a QRD accelerator based on Givens rotations, specifically designed to achieve an end-to-end latency of ≤ 50 μs for an 8 × 8 complex covariance matrix. The architecture transforms the complex input into a 16 × 16 realified representation, which is processed by a CORDIC-based datapath to compute both the Q and R matrices. To manage data dependencies, the design employs a stage-wise binary-tree elimination schedule enforced by a hard memory-visibility barrier.

While the compute core is fully pipelined with a fixed latency of 16 cycles, the system throughput is governed by the on-chip memory service model, resulting in a sustained initiation interval of 2.

 Functional correctness is established via bit-exact verification against a golden fixed-point C reference model using Q1.15 arithmetic. The design was implemented and validated on a ZynqTM UltraScale+TM Field Programmable Gate Array (FPGA) development board. Operating at 245.76 MHz, the accelerator completes a single QRD in 9.83 μs, satisfying the target requirement with a significant performance margin and achieving a sustained throughput of approximately 101 kQRD/s. The results indicate that end-to-end latency is primarily dominated by system-level data movement and synchronization barriers rather than raw arithmetic computation. These findings motivate future research into relaxed
consistency models and inter-stage data forwarding to further optimize scaling for higher-dimensional matrices. The novel architecture and scheduling methodology developed in this work has been filed for patent protection.}},
  author       = {{Singh, Vinay}},
  language     = {{eng}},
  note         = {{Student Paper}},
  title        = {{A QR Decomposition Accelerator for Digital Beamforming}},
  year         = {{2026}},
}

