@misc{9224551,
  abstract     = {{The discrete Fourier transform (DFT) is one of the most important algorithms
commonly used in baseband processing. While traditional hardware implemen-
tations like the Fast Fourier Transform (FFT) are highly optimized, they heav-
ily suffer from the von Neumann bottleneck. To overcome this, performing the
computations directly in memory using memristive crossbar arrays is a promising
solution.
This thesis evaluates different simulators for analog in-memory-computing and
found NeuroSim to be the most suitable for simulating power, performance, area
and accuracy. Modifications to the simulator are proposed to support the assess-
ment of three different crossbar-based designs for computing the DFT. To investi-
gate whether the DFT can be reliably implemented in the analog domain without
severe accuracy degradation, a Ferroelectric tunnel junction memristor was cho-
sen due to its high resistance and inherent robustness against non-idealities like
IR drop. We demonstrate that a symmetry design, which stacks the twiddle coef-
ficients into one crossbar and leverages the conjugate symmetry of the DFT, is the
most optimal for real-valued inputs. This design effectively reduces the hardware
cost of the peripherals, reducing the crossbar area by 50% compared to a naive
implementation.
For a small 64-point DFT, our evaluations show that the system can achieve
a mean square error of magnitude 10^−3. However, scaling to a large 1024-point
DFT in a single crossbar introduces significant IR drop, resulting in an increased
accuracy degradation. To mitigate this, a tiled architecture is adopted. As tiling
significantly increases energy and area due to the overhead of multiple analog-to-
digital converters, a tradeoff analysis is performed. Square tiles of size T = 1024
are found to be the most optimal, effectively reducing the error margin to 2 × 10^−2
while maintaining low energy consumption and latency. Finally, the results show
that a single crossbar implementation consumes approximately 8.65 nJ, about
half the energy of highly optimized CMOS FFTs, whereas the tiled architecture
requires approximately 15.6 nJ}},
  author       = {{Nielsen, Philip and Tatidis, Sofia}},
  language     = {{eng}},
  note         = {{Student Paper}},
  title        = {{Evaluation of Design Metrics for DFT Implemented Using In-Memory Computing}},
  year         = {{2026}},
}

