{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T18:08:22Z","timestamp":1775326102738,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539132","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:12Z","timestamp":1660331172000},"page":"3504-3512","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["BE3R: BERT based Early-Exit Using Expert Routing"],"prefix":"10.1145","author":[{"given":"Sourab","family":"Mangrulkar","sequence":"first","affiliation":[{"name":"Amazon, Bengaluru, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ankith","family":"M S","sequence":"additional","affiliation":[{"name":"Amazon, Bengaluru, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vivek","family":"Sembium","sequence":"additional","affiliation":[{"name":"Amazon, Bengaluru, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"https:\/\/quoradata.quora.com\/First-Quora-Dataset-Release-Question-Pairs","author":"Quora","year":"2018","unstructured":"Quora question pairs. https:\/\/quoradata.quora.com\/First-Quora-Dataset-Release-Question-Pairs, 2018."},{"key":"e_1_3_2_2_2_1","volume-title":"https:\/\/developer.nvidia.com\/tensorrt","author":"Tensorrt","year":"2018","unstructured":"Tensorrt. https:\/\/developer.nvidia.com\/tensorrt., 2018."},{"key":"e_1_3_2_2_3_1","volume-title":"Onnx: Open neural network exchange. https:\/\/github.com\/onnx\/onnx","author":"Bai J.","year":"2019","unstructured":"Bai, J., Lu, F., Zhang, K., et al. Onnx: Open neural network exchange. https:\/\/github.com\/onnx\/onnx, 2019."},{"key":"e_1_3_2_2_4_1","volume-title":"Pondernet: Learning to ponder. ArXiv abs\/2107.05407","author":"Banino A.","year":"2021","unstructured":"Banino, A., Balaguer, J., and Blundell, C. Pondernet: Learning to ponder. ArXiv abs\/2107.05407 (2021)."},{"key":"e_1_3_2_2_5_1","volume-title":"Generating long sequences with sparse transformers","author":"Child R.","year":"2019","unstructured":"Child, R., Gray, S., Radford, A., and Sutskever, I. Generating long sequences with sparse transformers, 2019."},{"key":"e_1_3_2_2_6_1","volume-title":"Rethinking attention with performers","author":"Choromanski K.","year":"2021","unstructured":"Choromanski, K., Likhosherstov, V., Dohan, D., Song, X., Gane, A., Sarlos, T., Hawkins, P., Davis, J., Mohiuddin, A., Kaiser, L., Belanger, D., Colwell, L., and Weller, A. Rethinking attention with performers, 2021."},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","author":"Conneau A.","year":"2018","unstructured":"Conneau, A., and Kiela, D. SentEval: An evaluation toolkit for universal sentence representations. In Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018) (Miyazaki, Japan, May 2018), European Language Resources Association (ELRA)."},{"key":"e_1_3_2_2_8_1","volume-title":"Funnel-transformer: Filtering out sequential redundancy for efficient language processing","author":"Dai Z.","year":"2020","unstructured":"Dai, Z., Lai, G., Yang, Y., and Le, Q. V. Funnel-transformer: Filtering out sequential redundancy for efficient language processing, 2020."},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin J.","year":"2019","unstructured":"Devlin, J., Chang, M.-W., Lee, K., and Toutanova, K. BERT: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers) (Minneapolis, Minnesota, June 2019), Association for Computational Linguistics, pp. 4171--4186."},{"key":"e_1_3_2_2_10_1","volume-title":"Proceedings of the Third International Workshop on Paraphrasing (IWP2005)","author":"Dolan W. B.","year":"2005","unstructured":"Dolan, W. B., and Brockett, C. Automatically constructing a corpus of sentential paraphrases. In Proceedings of the Third International Workshop on Paraphrasing (IWP2005) (2005)."},{"key":"e_1_3_2_2_11_1","volume-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","author":"Fedus W.","year":"2021","unstructured":"Fedus, W., Zoph, B., and Shazeer, N. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity, 2021."},{"key":"e_1_3_2_2_12_1","volume-title":"Power-bert: Accelerating bert inference via progressive wordvector elimination","author":"Goyal S.","year":"2020","unstructured":"Goyal, S., Choudhury, A. R., Raje, S. M., Chakaravarthy, V. T., Sabharwal, Y., and Verma, A. Power-bert: Accelerating bert inference via progressive wordvector elimination, 2020."},{"key":"e_1_3_2_2_13_1","volume-title":"Distilling the knowledge in a neural network","author":"Hinton G.","year":"2015","unstructured":"Hinton, G., Vinyals, O., and Dean, J. Distilling the knowledge in a neural network, 2015."},{"key":"e_1_3_2_2_14_1","volume-title":"Tinybert: Distilling bert for natural language understanding","author":"Jiao X.","year":"2020","unstructured":"Jiao, X., Yin, Y., Shang, L., Jiang, X., Chen, X., Li, L., Wang, F., and Liu, Q. Tinybert: Distilling bert for natural language understanding, 2020."},{"key":"e_1_3_2_2_15_1","volume-title":"ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT","author":"Khattab O.","year":"2020","unstructured":"Khattab, O., and Zaharia, M. ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT. Association for Computing Machinery, New York, NY, USA, 2020, p. 39--48."},{"key":"e_1_3_2_2_16_1","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings","author":"Kingma D. P.","year":"2015","unstructured":"Kingma, D. P., and Ba, J. Adam: A method for stochastic optimization. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings (2015), Y. Bengio and Y. LeCun, Eds."},{"key":"e_1_3_2_2_17_1","volume-title":"Reformer: The efficient transformer","author":"Kitaev N.","year":"2020","unstructured":"Kitaev, N., Lukasz Kaiser, and Levskaya, A. Reformer: The efficient transformer, 2020."},{"key":"e_1_3_2_2_18_1","first-page":"552","volume-title":"Proceedings of the Thirteenth International Conference on Principles of Knowledge Representation and Reasoning","author":"Levesqe H. J.","year":"2012","unstructured":"Levesqe, H. J., Davis, E., and Morgenstern, L. The winograd schema challenge. In Proceedings of the Thirteenth International Conference on Principles of Knowledge Representation and Reasoning (2012), KR'12, AAAI Press, p. 552--561."},{"key":"e_1_3_2_2_19_1","volume-title":"Fastbert: a selfdistilling bert with adaptive inference time","author":"Liu W.","year":"2020","unstructured":"Liu, W., Zhou, P., Zhao, Z., Wang, Z., Deng, H., and Ju, Q. Fastbert: a selfdistilling bert with adaptive inference time, 2020."},{"key":"e_1_3_2_2_20_1","volume-title":"Passage re-ranking with bert. arXiv preprint arXiv:1901.04085","author":"Nogueira R.","year":"2019","unstructured":"Nogueira, R., and Cho, K. Passage re-ranking with bert. arXiv preprint arXiv:1901.04085 (2019)."},{"key":"e_1_3_2_2_21_1","first-page":"8024","volume-title":"Advances in Neural Information Processing Systems 32, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc","author":"Paszke A.","year":"2019","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., Desmaison, A., Kopf, A., Yang, E., DeVito, Z., Raison, M., Tejani, A., Chilamkurthy, S., Steiner, B., Fang, L., Bai, J., and Chintala, S. Pytorch: An imperative style, high-performance deep learning library. In Advances in Neural Information Processing Systems 32, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett, Eds. Curran Associates, Inc., 2019, pp. 8024--8035."},{"key":"e_1_3_2_2_22_1","volume-title":"S. W., Wang, S., and Tang, J. Blockwise self-attention for long document understanding","author":"Qiu J.","year":"2020","unstructured":"Qiu, J., Ma, H., Levy, O., tau Yih, S. W., Wang, S., and Tang, J. Blockwise self-attention for long document understanding, 2020."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"e_1_3_2_2_24_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks","author":"Reimers N.","year":"2019","unstructured":"Reimers, N., and Gurevych, I. Sentence-bert: Sentence embeddings using siamese bert-networks, 2019."},{"key":"e_1_3_2_2_25_1","volume-title":"Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter","author":"Sanh V.","year":"2020","unstructured":"Sanh, V., Debut, L., Chaumond, J., and Wolf, T. Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter, 2020."},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing","author":"Socher R.","year":"2013","unstructured":"Socher, R., Perelygin, A., Wu, J., Chuang, J., Manning, C. D., Ng, A., and Potts, C. Recursive deep models for semantic compositionality over a sentiment treebank. In Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing (Seattle, Washington, USA, Oct. 2013), Association for Computational Linguistics, pp. 1631--1642."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"e_1_3_2_2_29_1","volume-title":"Linformer: Self-attention with linear complexity","author":"Wang S.","year":"2020","unstructured":"Wang, S., Li, B. Z., Khabsa, M., Fang, H., and Ma, H. Linformer: Self-attention with linear complexity, 2020."},{"key":"e_1_3_2_2_30_1","volume-title":"Minilm: Deep selfattention distillation for task-agnostic compression of pre-trained transformers","author":"Wang W.","year":"2020","unstructured":"Wang,W., Wei, F., Dong, L., Bao, H., Yang, N., and Zhou, M. Minilm: Deep selfattention distillation for task-agnostic compression of pre-trained transformers, 2020."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00290"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1101"},{"key":"e_1_3_2_2_33_1","volume-title":"Huggingface's transformers: State-of-the-art natural language processing. CoRR abs\/1910.03771","author":"Wolf T.","year":"2019","unstructured":"Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., and Brew, J. Huggingface's transformers: State-of-the-art natural language processing. CoRR abs\/1910.03771 (2019)."},{"key":"e_1_3_2_2_34_1","volume-title":"Deebert: Dynamic early exiting for accelerating bert inference","author":"Xin J.","year":"2020","unstructured":"Xin, J., Tang, R., Lee, J., Yu, Y., and Lin, J. Deebert: Dynamic early exiting for accelerating bert inference, 2020."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.633"},{"key":"e_1_3_2_2_36_1","volume-title":"Big bird: Transformers for longer sequences","author":"Zaheer M.","year":"2021","unstructured":"Zaheer, M., Guruganesh, G., Dubey, A., Ainslie, J., Alberti, C., Ontanon, S., Pham, P., Ravula, A.,Wang, Q., Yang, L., and Ahmed, A. Big bird: Transformers for longer sequences, 2021."},{"key":"e_1_3_2_2_37_1","volume-title":"Bert loses patience: Fast and robust inference with early exit","author":"Zhou W.","year":"2020","unstructured":"Zhou, W., Xu, C., Ge, T., McAuley, J., Xu, K., and Wei, F. Bert loses patience: Fast and robust inference with early exit, 2020."}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Washington DC USA","acronym":"KDD '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539132","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539132","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:58Z","timestamp":1750186978000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539132"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":37,"alternative-id":["10.1145\/3534678.3539132","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539132","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}