{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T19:45:56Z","timestamp":1776455156794,"version":"3.51.2"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031109850","type":"print"},{"value":"9783031109867","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-10986-7_36","type":"book-chapter","created":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T22:30:36Z","timestamp":1658183436000},"page":"442-456","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":36,"title":["A Survey of\u00a0Pretrained Language Models"],"prefix":"10.1007","author":[{"given":"Kaili","family":"Sun","sequence":"first","affiliation":[]},{"given":"Xudong","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Michael Y.","family":"Luo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,7,19]]},"reference":[{"key":"36_CR1","unstructured":"Alnawas, A., Arici, N.: Effect of word embedding variable parameters on Arabic sentiment analysis performance. arXiv preprint arXiv:2101.02906 (2021)"},{"key":"36_CR2","unstructured":"Bao, H., et al.: UniLMv2: pseudo-masked language models for unified language model pre-training. In: Proceedings of the 37th International Conference on Machine Learning, pp. 642\u2013652 (2020)"},{"key":"36_CR3","series-title":"IFIP Advances in Information and Communication Technology","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/978-3-030-49161-1_22","volume-title":"Artificial Intelligence Applications and Innovations","author":"G Barlas","year":"2020","unstructured":"Barlas, G., Stamatatos, E.: Cross-domain authorship attribution using pre-trained language models. In: Maglogiannis, I., Iliadis, L., Pimenidis, E. (eds.) AIAI 2020. IAICT, vol. 583, pp. 255\u2013266. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-49161-1_22"},{"key":"36_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2021\/6633213","volume":"2021","author":"N Boudjellal","year":"2021","unstructured":"Boudjellal, N., et al.: ABioNER: a BERT-based model for Arabic biomedical named-entity recognition. Complexity 2021, 1\u20136 (2021)","journal-title":"Complexity"},{"key":"36_CR5","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"issue":"5","key":"36_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3465055","volume":"12","author":"S Chaudhari","year":"2021","unstructured":"Chaudhari, S., Mithal, V., Polatkan, G., Ramanath, R.: An attentive survey of attention models. ACM Trans. Intell. Syst. Technol. 12(5), 1\u201332 (2021)","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"36_CR7","unstructured":"Clark, K., Luong, M.T., Le, Q.V., Manning, C.D.: ELECTRA: pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555 (2020)"},{"key":"36_CR8","doi-asserted-by":"crossref","unstructured":"Cui, Y., Che, W., Liu, T., Qin, B., Wang, S., Hu, G.: Revisiting pre-trained models for Chinese natural language processing. In: Findings of the Association for Computational Linguistics, EMNLP 2020, pp. 657\u2013668 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.58"},{"key":"36_CR9","doi-asserted-by":"publisher","first-page":"3504","DOI":"10.1109\/TASLP.2021.3124365","volume":"29","author":"Y Cui","year":"2021","unstructured":"Cui, Y., Che, W., Liu, T., Qin, B., Yang, Z.: Pre-training with whole word masking for Chinese BERT. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3504\u20133514 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"36_CR10","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q.V., Salakhutdinov, R.: Transformer-XL: attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860 (2019)","DOI":"10.18653\/v1\/P19-1285"},{"key":"36_CR11","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 4171\u20134186 (2019)"},{"issue":"1","key":"36_CR12","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1007\/s10489-021-02460-w","volume":"52","author":"P Do","year":"2021","unstructured":"Do, P., Phan, T.H.V.: Developing a BERT based triple classification model using knowledge graph embedding for question answering system. Appl. Intell. 52(1), 636\u2013651 (2021). https:\/\/doi.org\/10.1007\/s10489-021-02460-w","journal-title":"Appl. Intell."},{"key":"36_CR13","unstructured":"Dolan, B., Brockett, C.: Automatically constructing a corpus of sentential paraphrases. In: Proceedings of the 3rd International Workshop on Paraphrasing, pp. 9\u201316 (2005)"},{"key":"36_CR14","unstructured":"Dong, L., et al.: Unified language model pre-training for natural language understanding and generation. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems, pp. 13063\u201313075 (2019)"},{"key":"36_CR15","doi-asserted-by":"crossref","unstructured":"El Boukkouri, H., Ferret, O., Lavergne, T., Noji, H., Zweigenbaum, P., Tsujii, J.: CharacterBERT: reconciling ELMo and BERT for word-level open-vocabulary representations from characters. In: Proceedings of the 18th International Conference on Computational Linguistics, pp. 6903\u20136915 (2020)","DOI":"10.18653\/v1\/2020.coling-main.609"},{"key":"36_CR16","unstructured":"Erhan, D., Courville, A., Bengio, Y., Vincent, P.: Why does unsupervised pre-training help deep learning? In: Proceedings of the 13th International Conference on Artificial Intelligence and Statistics, pp. 201\u2013208 (2010)"},{"key":"36_CR17","unstructured":"Gong, Y., Liu, L., Yang, M., Bourdev, L.: Compressing deep convolutional networks using vector quantization. arXiv preprint arXiv:1412.6115 (2014)"},{"key":"36_CR18","unstructured":"Han, S., Pool, J., Tran, J., Dally, W.: Learning both weights and connections for efficient neural network. In: Advances in Neural Information Processing Systems 28 (2015)"},{"issue":"8","key":"36_CR19","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"36_CR20","doi-asserted-by":"crossref","unstructured":"Jiao, X., et al.: TinyBERT: distilling BERT for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"36_CR21","unstructured":"Lan, Z., Chen, M., Goodman, S., Gimpel, K., Sharma, P., Soricut, R.: ALBERT: a lite BERT for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)"},{"key":"36_CR22","doi-asserted-by":"crossref","unstructured":"Li, J., Tang, T., Zhao, W., Wen, J.: Pretrained language models for text generation: a survey. In: Proceedings of the 30th International Joint Conference on Artificial Intelligence, pp. 4492\u20134497 (2021)","DOI":"10.24963\/ijcai.2021\/612"},{"key":"36_CR23","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: VisualBERT: a simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)"},{"key":"36_CR24","doi-asserted-by":"publisher","first-page":"8762","DOI":"10.1109\/ACCESS.2021.3049294","volume":"9","author":"Y Lin","year":"2021","unstructured":"Lin, Y., Wang, C., Song, H., Li, Y.: Multi-head self-attention transformation networks for aspect-based sentiment analysis. IEEE Access 9, 8762\u20138770 (2021)","journal-title":"IEEE Access"},{"key":"36_CR25","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1007\/978-3-030-82147-0_31","volume-title":"Knowledge Science, Engineering and Management","author":"J Liu","year":"2021","unstructured":"Liu, J., Wu, J., Luo, X.: Chinese judicial summarising based on short sentence extraction and GPT-2. In: Qiu, H., Zhang, C., Fei, Z., Qiu, M., Kung, S.-Y. (eds.) KSEM 2021. LNCS (LNAI), vol. 12816, pp. 376\u2013393. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-82147-0_31"},{"key":"36_CR26","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"issue":"2","key":"36_CR27","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1080\/0952813X.2020.1744198","volume":"33","author":"Z Meng","year":"2021","unstructured":"Meng, Z., Tian, S., Yu, L., Lv, Y.: Joint extraction of entities and relations based on character graph convolutional network and multi-head self-attention mechanism. J. Exp. Theoret. Artif. Intell. 33(2), 349\u2013362 (2021)","journal-title":"J. Exp. Theoret. Artif. Intell."},{"key":"36_CR28","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"36_CR29","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. arXiv preprint arXiv:2203.02155 (2022)"},{"key":"36_CR30","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: GloVe: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing, pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"36_CR31","doi-asserted-by":"crossref","unstructured":"Peters, M., et al.: Deep contextualized word representations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 2227\u20132237 (2018)","DOI":"10.18653\/v1\/N18-1202"},{"issue":"10","key":"36_CR32","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"XP Qiu","year":"2020","unstructured":"Qiu, X.P., Sun, T.X., Xu, Y.G., Shao, Y.F., Dai, N., Huang, X.J.: Pre-trained models for natural language processing: a survey. Sci. Chin. Technol. Sci. 63(10), 1872\u20131897 (2020). https:\/\/doi.org\/10.1007\/s11431-020-1647-3","journal-title":"Sci. Chin. Technol. Sci."},{"key":"36_CR33","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018). https:\/\/www.cs.ubc.ca\/~amuham01\/LING530\/papers\/radford2018improving.pdf"},{"issue":"8","key":"36_CR34","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"36_CR35","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: FitNets: hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)"},{"key":"36_CR36","doi-asserted-by":"publisher","first-page":"132306","DOI":"10.1016\/j.physd.2019.132306","volume":"404","author":"A Sherstinsky","year":"2020","unstructured":"Sherstinsky, A.: Fundamentals of recurrent neural network (RNN) and long short-term memory (LSTM) network. Physica D 404, 132306 (2020)","journal-title":"Physica D"},{"key":"36_CR37","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642 (2013)"},{"key":"36_CR38","unstructured":"Sun, Y., et al.: ERNIE 3.0: large-scale knowledge enhanced pre-training for language understanding and generation. arXiv preprint arXiv:2107.02137 (2021)"},{"key":"36_CR39","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: ERNIE 2.0: a continual pre-training framework for language understanding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 8968\u20138975 (2020)","DOI":"10.1609\/aaai.v34i05.6428"},{"key":"36_CR40","doi-asserted-by":"crossref","unstructured":"Torrey, L., Shavlik, J.: Transfer learning. In: Handbook of Research on Machine Learning Applications and Trends: Algorithms, Methods, and Techniques, pp. 242\u2013264. IGI Global (2010)","DOI":"10.4018\/978-1-60566-766-9.ch011"},{"key":"36_CR41","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 6000\u20136010 (2017)"},{"key":"36_CR42","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)","DOI":"10.18653\/v1\/W18-5446"},{"key":"36_CR43","doi-asserted-by":"publisher","first-page":"138162","DOI":"10.1109\/ACCESS.2020.3012595","volume":"8","author":"T Wang","year":"2020","unstructured":"Wang, T., Lu, K., Chow, K.P., Zhu, Q.: COVID-19 sensing: negative sentiment analysis on social media in China via BERT model. IEEE Access 8, 138162\u2013138169 (2020)","journal-title":"IEEE Access"},{"key":"36_CR44","unstructured":"Wang, W., et al.: StructBERT: incorporating language structures into pre-training for deep language understanding. arXiv preprint arXiv:1908.04577 (2019)"},{"key":"36_CR45","unstructured":"Xu, H., et al.: Pre-trained models: past, present and future. arXiv preprint arXiv:2106.07139 (2021)"},{"key":"36_CR46","doi-asserted-by":"crossref","unstructured":"Xu, L., et al.: CLUE: a Chinese language understanding evaluation benchmark. In: Proceedings of the 28th International Conference on Computational Linguistics, pp. 4762\u20134772 (2020)","DOI":"10.18653\/v1\/2020.coling-main.419"},{"issue":"2","key":"36_CR47","first-page":"022033","volume":"1748","author":"M Yang","year":"2021","unstructured":"Yang, M., Xu, J., Luo, K., Zhang, Y.: Sentiment analysis of Chinese text based on Elmo-RNN model. J. Phys: Conf. Ser. 1748(2), 022033 (2021)","journal-title":"J. Phys: Conf. Ser."},{"key":"36_CR48","first-page":"5753","volume":"32","author":"Z Yang","year":"2019","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R.R., Le, Q.V.: XLNet: generalized autoregressive pretraining for language understanding. Adv. Neural. Inf. Process. Syst. 32, 5753\u20135763 (2019)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"8","key":"36_CR49","doi-asserted-by":"publisher","first-page":"5831","DOI":"10.1007\/s00500-019-04367-8","volume":"24","author":"X Yu","year":"2019","unstructured":"Yu, X., Feng, W., Wang, H., Chu, Q., Chen, Q.: An attention mechanism and multi-granularity-based Bi-LSTM model for Chinese Q &A system. Soft. Comput. 24(8), 5831\u20135845 (2019). https:\/\/doi.org\/10.1007\/s00500-019-04367-8","journal-title":"Soft. Comput."},{"key":"36_CR50","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1016\/j.neucom.2021.07.002","volume":"460","author":"Z Zhang","year":"2021","unstructured":"Zhang, Z., Wu, S., Jiang, D., Chen, G.: BERT-JAM: maximizing the utilization of BERT for neural machine translation. Neurocomputing 460, 84\u201394 (2021)","journal-title":"Neurocomputing"},{"key":"36_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Han, X., Liu, Z., Jiang, X., Sun, M., Liu, Q.: ERNIE: enhanced language representation with informative entities. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 1441\u20131451 (2019)","DOI":"10.18653\/v1\/P19-1139"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-10986-7_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T22:36:53Z","timestamp":1658183813000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-10986-7_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031109850","9783031109867"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-10986-7_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"19 July 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 August 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ksem22.smart-conf.net\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"498","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"169","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}