{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T08:30:02Z","timestamp":1770280202586,"version":"3.49.0"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,10]]},"DOI":"10.1109\/icfpt64416.2024.11113391","type":"proceedings-article","created":{"date-parts":[[2025,8,18]],"date-time":"2025-08-18T19:38:13Z","timestamp":1755545893000},"page":"01-10","source":"Crossref","is-referenced-by-count":1,"title":["FINN-T: Compiling Custom Dataflow Accelerators for Quantized Transformers"],"prefix":"10.1109","author":[{"given":"Christoph","family":"Berganski","sequence":"first","affiliation":[{"name":"Paderborn University,Germany"}]},{"given":"Felix","family":"Jentzsch","sequence":"additional","affiliation":[{"name":"Paderborn University,Germany"}]},{"given":"Marco","family":"Platzner","sequence":"additional","affiliation":[{"name":"Paderborn University,Germany"}]},{"given":"Max","family":"Kuhmichel","sequence":"additional","affiliation":[{"name":"South Westphalia University of Applied Sciences,Germany"}]},{"given":"Heiner","family":"Giefers","sequence":"additional","affiliation":[{"name":"South Westphalia University of Applied Sciences,Germany"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","author":"Radford","year":"2018","journal-title":"Improving language understanding by generative pre-training"},{"issue":"8","key":"ref3","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref4","first-page":"4171","article-title":"BERT: Pretraining of deep bidirectional transformers for language understanding","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin"},{"key":"ref5","article-title":"An image is worth 16 times 16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021","journal-title":"ICLR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/2384\/1\/012017"},{"key":"ref8","first-page":"18281","article-title":"Particle transformer for jet tagging","volume-title":"Proceedings of the 39th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"162","author":"Qu","year":"2022"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021744"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3242897"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/FPL60245.2023.00012"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/FPL57034.2022.00015"},{"key":"ref13","article-title":"Vaqf: Fully automatic software-hardware co-design framework for low-bit vision transformer","author":"Sun","year":"2022","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/FPL60245.2023.00015"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/FPL57034.2022.00027"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/FPL60245.2023.00048"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00051"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3370748.3406567"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530585"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3197489"},{"key":"ref21","article-title":"Ultra fast transformers on fpgas for particle physics experiments","author":"Jiang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT56656.2022.9974463"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW59300.2023.00029"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/13\/07\/P07027"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643586"},{"key":"ref26","article-title":"Layer normalization","author":"Ba","year":"2016","journal-title":"arXiv preprint"},{"key":"ref27","article-title":"Gaussian error linear units (gelus)","author":"Hendrycks","year":"2016","journal-title":"arXiv preprint"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-021-94691-7"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED58423.2023.10244348"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3390\/technologies8030046"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASID.2018.8693206"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICSICT.2018.8565706"},{"key":"ref33","article-title":"Efficient softmax approximation for deep neural networks with attention mechanism","author":"Vasyltsov","year":"2021","journal-title":"arXiv preprint"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/APCCAS.2018.8605654"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/AICAS.2019.8771616"},{"key":"ref36","volume-title":"Xilinx\/brevitas","author":"Pappalardo","year":"2023"},{"key":"ref37","article-title":"Streamlined deployment for quantized neural networks","author":"Umuroglu","year":"2017","journal-title":"arXiv preprint"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3547141"},{"key":"ref39","article-title":"QONNX: Representing Arbitrary-Precision Quantized Neural Networks","volume-title":"4th Workshop on Accelerated Machine Learning (AccML) at HiPEAC 2022 Conference","author":"Pappalardo","year":"2022"},{"key":"ref40","volume-title":"fastmachinelearning\/qonnx","author":"Umuroglu","year":"2022"},{"key":"ref41","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume-title":"International conference on machine learning. pmlr","author":"Ioffe","year":"2015"},{"key":"ref42","first-page":"37617","article-title":"Understanding the failure of batch normalization for transformers in nlp","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Wang","year":"2022"},{"key":"ref43","article-title":"OPTQ: Accurate quantization for generative pre-trained transformers","volume-title":"The Eleventh International Conference on Learning Representations","author":"Frantar","year":"2023"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.3269"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3202091"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2018.2797022"},{"key":"ref47","author":"Xu","year":"2024","journal-title":"A survey on knowledge distillation of large language models"},{"key":"ref48","author":"Eldan","year":"2023","journal-title":"Tinystories: How small can language models be and still speak coherent english?"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3530811"}],"event":{"name":"2024 International Conference on Field Programmable Technology (ICFPT)","location":"Sydney, Australia","start":{"date-parts":[[2024,12,10]]},"end":{"date-parts":[[2024,12,12]]}},"container-title":["2024 International Conference on Field Programmable Technology (ICFPT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11113383\/11113387\/11113391.pdf?arnumber=11113391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T04:42:15Z","timestamp":1755578535000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11113391\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icfpt64416.2024.11113391","relation":{},"subject":[],"published":{"date-parts":[[2024,12,10]]}}}