{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:09:03Z","timestamp":1773248943662,"version":"3.50.1"},"reference-count":52,"publisher":"Association for Computing Machinery (ACM)","issue":"1","license":[{"start":{"date-parts":[[2024,1,27]],"date-time":"2024-01-27T00:00:00Z","timestamp":1706313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100023581","name":"National Science Foundation Graduate Research Fellowship Program","doi-asserted-by":"crossref","award":["DGE-2038238"],"award-info":[{"award-number":["DGE-2038238"]}],"id":[{"id":"10.13039\/100023581","id-type":"DOI","asserted-by":"crossref"}]},{"name":"National Science Foundation","award":["2117997"],"award-info":[{"award-number":["2117997"]}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Office of Science, Office of Advanced Scientific Computing Research","award":["DE-FOA-0002501"],"award-info":[{"award-number":["DE-FOA-0002501"]}]},{"name":"DOE Office of Science, Office of High Energy Physics Early Career Research Program","award":["DE-SC0021187"],"award-info":[{"award-number":["DE-SC0021187"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["ACM Trans. Reconfigurable Technol. Syst."],"published-print":{"date-parts":[[2024,3,31]]},"abstract":"<jats:p>\n            Deep neural networks use skip connections to improve training convergence. However, these skip connections are costly in hardware, requiring extra buffers and increasing on- and off-chip memory utilization and bandwidth requirements. In this article, we show that skip connections can be optimized for hardware when tackled with a hardware-software codesign approach. We argue that while a network\u2019s skip connections are needed for the network to learn, they can later be removed or shortened to provide a more hardware-efficient implementation with minimal to no accuracy loss. We introduce\n            <jats:sc>Tailor<\/jats:sc>\n            , a codesign tool whose hardware-aware training algorithm gradually removes or shortens a fully trained network\u2019s skip connections to lower the hardware cost.\n            <jats:sc>Tailor<\/jats:sc>\n            improves resource utilization by up to 34% for block random access memories (BRAMs), 13% for flip-flops (FFs), and 16% for look-up tables (LUTs) for on-chip, dataflow-style architectures.\n            <jats:sc>Tailor<\/jats:sc>\n            increases performance by 30% and reduces memory bandwidth by 45% for a two-dimensional processing element array architecture.\n          <\/jats:p>","DOI":"10.1145\/3624990","type":"journal-article","created":{"date-parts":[[2023,9,22]],"date-time":"2023-09-22T12:51:32Z","timestamp":1695387092000},"page":"1-23","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["<scp>Tailor<\/scp>\n            : Altering Skip Connections for Resource-Efficient Inference"],"prefix":"10.1145","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1213-421X","authenticated-orcid":false,"given":"Olivia","family":"Weng","sequence":"first","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4804-7305","authenticated-orcid":false,"given":"Gabriel","family":"Marcano","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3651-0232","authenticated-orcid":false,"given":"Vladimir","family":"Loncar","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8811-2258","authenticated-orcid":false,"given":"Alireza","family":"Khodamoradi","sequence":"additional","affiliation":[{"name":"AMD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9768-5349","authenticated-orcid":false,"given":"Abarajithan","family":"G","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4329-0197","authenticated-orcid":false,"given":"Nojan","family":"Sheybani","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4283-0833","authenticated-orcid":false,"given":"Andres","family":"Meza","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0798-3794","authenticated-orcid":false,"given":"Farinaz","family":"Koushanfar","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2087-865X","authenticated-orcid":false,"given":"Kristof","family":"Denolf","sequence":"additional","affiliation":[{"name":"AMD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5076-7096","authenticated-orcid":false,"given":"Javier Mauricio","family":"Duarte","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9062-5570","authenticated-orcid":false,"given":"Ryan","family":"Kastner","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,1,27]]},"reference":[{"key":"e_1_3_2_2_2","unstructured":"2023. Tailor. https:\/\/github.com\/oliviaweng\/tailor"},{"key":"e_1_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/ac0ea1"},{"key":"e_1_3_2_4_2","article-title":"MLPerf tiny benchmark","author":"Banbury Colby","year":"2021","unstructured":"Colby Banbury, Vijay Janapa Reddi, Peter Torelli, Jeremy Holleman, Nat Jeffries, Csaba Kiraly, Pietro Montino, David Kanter, Sebastian Ahmed, Danilo Pau, et\u00a0al. 2021. MLPerf tiny benchmark. arXiv preprint arXiv:2106.07597 (2021).","journal-title":"arXiv preprint arXiv:2106.07597"},{"key":"e_1_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"key":"e_1_3_2_6_2","volume-title":"Workshop on Benchmarking Machine Learning Workloads on Emerging Hardware (MLBench)","author":"Borras Hendrik","year":"2022","unstructured":"Hendrik Borras et\u00a0al. 2022. Open-source FPGA-ML codesign for the MLPerf tiny benchmark. In Workshop on Benchmarking Machine Learning Workloads on Emerging Hardware (MLBench). arXiv:2206.11791 [cs.LG]."},{"key":"e_1_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00027"},{"key":"e_1_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"e_1_3_2_10_2","unstructured":"Zhen Dong Zhewei Yao Daiyaan Arfeen Amir Gholami Michael W. Mahoney and Kurt Keutzer. 2020. HAWQ-V2: Hessian aware trace-weighted quantization of neural networks. 33 (2020) 18518. arXiv:1911.03852. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/d77c703536718b95308130ff2e5cf9ee-Paper.pdf"},{"key":"e_1_3_2_11_2","first-page":"293","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Dong Zhen","year":"2019","unstructured":"Zhen Dong, Zhewei Yao, Amir Gholami, Michael W. Mahoney, and Kurt Keutzer. 2019. HAWQ: Hessian aware quantization of neural networks with mixed-precision. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 293. arXiv:1905.03696"},{"key":"e_1_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/13\/07\/P07027"},{"key":"e_1_3_2_13_2","volume-title":"1st TinyML Research Symposium","author":"Fahim Farah","year":"2021","unstructured":"Farah Fahim et\u00a0al. 2021. hls4ml: An open-source codesign workflow to empower scientific low-power machine learning devices. In 1st TinyML Research Symposium. arxiv:2103.05579 [cs.LG]."},{"key":"e_1_3_2_14_2","volume-title":"Deep Learning with INT8 Optimization on Xilinx Devices","author":"Fu Yao","year":"2017","unstructured":"Yao Fu, Ephrem Wu, Ashish Sirasao, Sedny Attia, Kamran Khan, and Ralph Wittig. 2017. Deep Learning with INT8 Optimization on Xilinx Devices. White Paper WP486. https:\/\/docs.xilinx.com\/v\/u\/en-US\/wp486-deep-learning-int8"},{"key":"e_1_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Amir Gholami Sehoon Kim Zhen Dong Zhewei Yao Michael W. Mahoney and Kurt Keutzer. 2021. A survey of quantization methods for efficient neural network inference. (2021). arXiv:2103.13630","DOI":"10.1201\/9781003162810-13"},{"key":"e_1_3_2_16_2","first-page":"249","volume-title":"Proceedings of the 13th International Conference on Artificial Intelligence and Statistics","volume":"9","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In Proceedings of the 13th International Conference on Artificial Intelligence and Statistics, Yee Whye Teh and Mike Titterington (Eds.), Vol. 9. 249. https:\/\/proceedings.mlr.press\/v9\/glorot10a.html"},{"key":"e_1_3_2_17_2","first-page":"315","volume-title":"Proceedings of the 14th International Conference on Artificial Intelligence and Statistics","volume":"15","author":"Glorot Xavier","year":"2011","unstructured":"Xavier Glorot, Antoine Bordes, and Yoshua Bengio. 2011. Deep sparse rectifier neural networks. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, Geoffrey Gordon, David Dunson, and Miroslav Dud\u00edk (Eds.), Vol. 15. 315. http:\/\/proceedings.mlr.press\/v15\/glorot11a.html"},{"key":"e_1_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"e_1_3_2_20_2","unstructured":"Geoffrey Hinton Oriol Vinyals and Jeff Dean. 2015. Distilling the knowledge in a neural network. (2015). arXiv:1503.02531"},{"key":"e_1_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1561\/1000000060"},{"key":"e_1_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053889"},{"key":"e_1_3_2_25_2","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky Alex","year":"2009","unstructured":"Alex Krizhevsky. 2009. Learning multiple layers of features from tiny images. Tech Report (2009).","journal-title":"Tech Report"},{"key":"e_1_3_2_26_2","first-page":"8935","article-title":"Residual distillation: Towards portable deep neural networks without shortcuts","volume":"33","author":"Li Guilin","year":"2020","unstructured":"Guilin Li, Junlei Zhang, Yunhe Wang, Chuanjian Liu, Matthias Tan, Yunfeng Lin, Wei Zhang, Jiashi Feng, and Tong Zhang. 2020. Residual distillation: Towards portable deep neural networks without shortcuts. Advances in Neural Information Processing Systems 33 (2020), 8935. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/657b96f0592803e25a4f07166fff289a-Paper.pdf","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_27_2","article-title":"Visualizing the loss landscape of neural nets","volume":"31","author":"Li Hao","year":"2018","unstructured":"Hao Li, Zheng Xu, Gavin Taylor, Christoph Studer, and Tom Goldstein. 2018. Visualizing the loss landscape of neural nets. Advances in Neural Information Processing Systems 31 (2018).","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_28_2","article-title":"ResNet with one-neuron hidden layers is a universal approximator","volume":"31","author":"Lin Hongzhou","year":"2018","unstructured":"Hongzhou Lin and Stefanie Jegelka. 2018. ResNet with one-neuron hidden layers is a universal approximator. Advances in Neural Information Processing Systems 31 (2018).","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASPCON49795.2020.9276669"},{"key":"e_1_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2018.2815603"},{"key":"e_1_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2017.8050344"},{"key":"e_1_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"e_1_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01424-7_44"},{"key":"e_1_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2017.8335699"},{"key":"e_1_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104425"},{"key":"e_1_3_2_36_2","article-title":"Reading digits in natural images with unsupervised feature learning","author":"Netzer Yuval","year":"2011","unstructured":"Yuval Netzer, Tao Wang, Adam Coates, Alessandro Bissacco, Bo Wu, and Andrew Y. Ng. 2011. Reading digits in natural images with unsupervised feature learning. NIPS Workshop on Deep Learning and Unsupervised Feature Learning (2011).","journal-title":"NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"e_1_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"e_1_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.3333552"},{"key":"e_1_3_2_40_2","first-page":"8024","volume-title":"Advances in Neural Information Processing Systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An imperative style, high-performance deep learning library. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.). Vol. 32. 8024. arXiv:1912.01703. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1090\/proc\/14789"},{"key":"e_1_3_2_42_2","unstructured":"Bharat Bhusan Sau and Vineeth N. Balasubramanian. 2016. Deep model compression: Distilling knowledge from noisy teachers. (2016). arxiv:1610.09650"},{"key":"e_1_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1186\/s13059-020-02255-1"},{"key":"e_1_3_2_44_2","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_2_45_2","first-page":"1195","volume-title":"Advances in Neural Information Processing Systems","author":"Tarvainen Antti","year":"2017","unstructured":"Antti Tarvainen and Harri Valpola. 2017. Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. 1195. arXiv:1703.01780. https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/68053af2923e00204c3ca7c6a3150cf7-Paper.pdf"},{"key":"e_1_3_2_46_2","article-title":"Residual networks behave like ensembles of relatively shallow networks","volume":"29","author":"Veit Andreas","year":"2016","unstructured":"Andreas Veit, Michael J. Wilber, and Serge Belongie. 2016. Residual networks behave like ensembles of relatively shallow networks. Advances in Neural Information Processing Systems 29 (2016). arXiv:1605.06431. https:\/\/proceedings.neurips.cc\/paper\/2016\/file\/37bc2f75bf1bcfe8450a1a41c200364c-Paper.pdf","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00881"},{"key":"e_1_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-016-0043-6"},{"key":"e_1_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293902"},{"key":"e_1_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData50022.2020.9378171"},{"key":"e_1_3_2_51_2","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2017. DiracNets: Training very deep neural networks without skip-connections. (2017). arxiv:1706.00388"},{"key":"e_1_3_2_52_2","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2018. DiracNets: Training very deep neural networks without skip-connections. (2018). arxiv:1706.00388"},{"key":"e_1_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"}],"container-title":["ACM Transactions on Reconfigurable Technology and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624990","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624990","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:35:46Z","timestamp":1750178146000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624990"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,27]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,3,31]]}},"alternative-id":["10.1145\/3624990"],"URL":"https:\/\/doi.org\/10.1145\/3624990","relation":{},"ISSN":["1936-7406","1936-7414"],"issn-type":[{"value":"1936-7406","type":"print"},{"value":"1936-7414","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,27]]},"assertion":[{"value":"2023-02-28","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2023-09-01","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-01-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}