{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:49:58Z","timestamp":1767340198843,"version":"3.28.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534402","type":"proceedings-article","created":{"date-parts":[[2021,9,23]],"date-time":"2021-09-23T22:32:08Z","timestamp":1632436328000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["RefBERT: Compressing BERT by Referencing to Pre-computed Representations"],"prefix":"10.1109","author":[{"given":"Xinyi","family":"Wang","sequence":"first","affiliation":[]},{"given":"Haiqin","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Liang","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Mo","sequence":"additional","affiliation":[]},{"given":"Jianping","family":"Shen","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450026"},{"key":"ref38","first-page":"5754","article-title":"Xlnet: Generalized autoregressive pretraining for language understanding","author":"yang","year":"2019","journal-title":"NeurIPS"},{"key":"ref33","first-page":"1147","article-title":"Neural topic model with attention for supervised learning","volume":"108","author":"wang","year":"2020","journal-title":"AISTATS"},{"key":"ref32","article-title":"Infobert: Improving robustness of language models from an information theoretic perspective","author":"wang","year":"2020","journal-title":"CoRR"},{"key":"ref31","article-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding","author":"wang","year":"2019","journal-title":"ICLRE"},{"key":"ref30","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NIPS"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371792"},{"key":"ref36","article-title":"Emotion dynamics modeling via BERT","author":"yang","year":"2021","journal-title":"IJCNN"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1101"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00290"},{"journal-title":"Advances and challenges in conversational recommender systems A survey","year":"2021","author":"gao","key":"ref10"},{"journal-title":"Retrieving and reading A comprehensive survey on open-domain question answering","year":"2021","author":"zhu","key":"ref40"},{"key":"ref11","article-title":"Deep compression: Compressing deep neural network with pruning, trained quantization and huffman coding","author":"han","year":"2016","journal-title":"ICLRE"},{"key":"ref12","article-title":"Distilling the knowledge in a neural network","author":"hinton","year":"2015","journal-title":"CoRR"},{"key":"ref13","first-page":"7344","article-title":"Normalization helps training of quantized LSTM","author":"hou","year":"2019","journal-title":"NeurIPS"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2017.06.012"},{"key":"ref15","first-page":"397","article-title":"Higru: Hierarchical gated recurrent units for utterance-level emotion recognition","author":"jiao","year":"2019","journal-title":"NAACL HLT 2019"},{"key":"ref16","article-title":"Tinybert: Distilling BERT for natural language understanding","author":"jiao","year":"2019","journal-title":"CoRR"},{"key":"ref17","article-title":"ALBERT: A lite BERT for self-supervised learning of language representations","author":"lan","year":"2019","journal-title":"CoRR"},{"key":"ref18","article-title":"Have we solved the hard problem? it's not easy! contextual lexical contrast as a means to probe neural coherence","author":"lei","year":"2021","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/562"},{"key":"ref28","article-title":"Distilling task-specific knowledge from BERT into simple neural networks","author":"tang","year":"2019","journal-title":"CoRR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1595"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1441"},{"key":"ref3","article-title":"Transformer to CNN: label-scarce distillation for efficient text classification","author":"chia","year":"2019","journal-title":"CoRR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3416063"},{"key":"ref29","article-title":"Well-read students learn better: The impact of student initialization on knowledge distillation","author":"turc","year":"2019","journal-title":"CoRR"},{"key":"ref5","article-title":"ELECTRA: pre-training text encoders as discriminators rather than generators","author":"clark","year":"2020","journal-title":"ICLRE"},{"key":"ref8","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"NAACL-HLT"},{"journal-title":"Elements of Information Theory (Wiley Series in Telecommunications and Signal Processing)","year":"2006","author":"cover","key":"ref7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/S17-2001"},{"key":"ref9","article-title":"Automatically constructing a corpus of sentential paraphrases","author":"dolan","year":"2005","journal-title":"Proceedings of the Third International Workshop on Paraphrasing IWP IJCNLP"},{"key":"ref1","article-title":"The fifth PASCAL recognizing textual entailment challenge","author":"bentivogli","year":"2009","journal-title":"TAC"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414032"},{"key":"ref22","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"NIPS"},{"key":"ref21","article-title":"Roberta: A robustly optimized BERT pretraining approach","author":"liu","year":"2019","journal-title":"CoRR"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"journal-title":"Opening the black box of deep neural networks via information","year":"2017","author":"shwartz-ziv","key":"ref26"},{"key":"ref25","article-title":"Distilbert, a distilled version of BERT: smaller, faster, cheaper and lighter","author":"sanh","year":"2019","journal-title":"CoRR"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534402.pdf?arnumber=9534402","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:45:56Z","timestamp":1652197556000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534402\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534402","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}