{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T20:58:39Z","timestamp":1775509119425,"version":"3.50.1"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"University Grants Commission"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Sustain. Comput."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tsusc.2026.3666456","type":"journal-article","created":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T21:00:10Z","timestamp":1771534810000},"page":"98-110","source":"Crossref","is-referenced-by-count":0,"title":["BEExformer: A Fast Inferencing Binarized Transformer With Early Exits"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9191-1771","authenticated-orcid":false,"given":"Wazib","family":"Ansar","sequence":"first","affiliation":[{"name":"A. K. Choudhury School of IT, University of Calcutta, Kolkata, West Bengal, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Saptarsi","family":"Goswami","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Bangabasi Morning College, Kolkata, West Bengal, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4380-3172","authenticated-orcid":false,"given":"Amlan","family":"Chakrabarti","sequence":"additional","affiliation":[{"name":"A. K. Choudhury School of IT, University of Calcutta, Kolkata, West Bengal, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3495162"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2022.3210564"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2025.3578834"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2025.3544430"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.829"},{"key":"ref6","article-title":"DistilBERT, a distilled version of BERT: Smaller, faster, cheaper and lighter","author":"Sanh","year":"2019"},{"key":"ref7","article-title":"Linformer: Self-attention with linear complexity","author":"Wang","year":"2020"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539132"},{"key":"ref9","first-page":"3301","article-title":"Shallow-deep networks: Understanding and mitigating network overthinking","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kaya"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.sustainlp-1.11"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"ref12","first-page":"27168","article-title":"ZeroQuant: Efficient and affordable post-training quantization for large-scale transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yao"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01227-8"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA58977.2023.00104"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3623402"},{"key":"ref16","first-page":"483","article-title":"Efficient post-training quantization with FP8 formats","volume-title":"Proc. Mach. Learn. Syst.","volume":"6","author":"Shen","year":"2024"},{"key":"ref17","first-page":"17737","article-title":"Post-training sparsity-aware quantization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Shomron"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.498"},{"key":"ref19","first-page":"28351","article-title":"BiBench: Benchmarking and analyzing network binarization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Qin","year":"2023"},{"key":"ref20","first-page":"4114","article-title":"Binarized neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hubara","year":"2016"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.334"},{"key":"ref22","article-title":"BiBERT: Accurate fully binarized BERT","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Qin"},{"key":"ref23","first-page":"14303","article-title":"BiT: Robustly binarized multi-distilled transformer","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref24","first-page":"6906","article-title":"Does knowledge distillation really work?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Stanton"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-024-05444-8"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3117837"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2025.3528105"},{"key":"ref28","first-page":"18330","article-title":"BERT loses patience: Fast and robust inference with early exit","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhou","year":"2020"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.204"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.240"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-03865-x"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.37"},{"key":"ref33","article-title":"Reducing transformer depth on demand with structured dropout","author":"Fan","year":"2019"},{"key":"ref34","first-page":"14037","article-title":"Are sixteen heads really better than one?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Michel","year":"2019"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref38","article-title":"ALBERT: A lite BERT for self-supervised learning of language representations","author":"Lan","year":"2019"},{"key":"ref39","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"}],"container-title":["IEEE Transactions on Sustainable Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274860\/11474690\/11399904.pdf?arnumber=11399904","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:58:36Z","timestamp":1775505516000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11399904\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":39,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tsusc.2026.3666456","relation":{},"ISSN":["2377-3782","2377-3790"],"issn-type":[{"value":"2377-3782","type":"electronic"},{"value":"2377-3790","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}