{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T22:07:59Z","timestamp":1767650879210,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":45,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819794300"},{"type":"electronic","value":"9789819794317"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-9431-7_24","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:03:04Z","timestamp":1730383384000},"page":"311-323","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Outperforming Larger Models on\u00a0Text Classification Through Continued Pre-training"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5050-2779","authenticated-orcid":false,"given":"Yu","family":"Zheng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0611-4515","authenticated-orcid":false,"given":"Ming","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9204-2376","authenticated-orcid":false,"given":"Zou","family":"Ao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1526-7889","authenticated-orcid":false,"given":"Wenning","family":"Hao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8159-0921","authenticated-orcid":false,"given":"Hui","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8293-4709","authenticated-orcid":false,"given":"Yi","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"24_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"24_CR2","unstructured":"Brown, T.B., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol.\u00a033, pp. 1877\u20131901 (2020)"},{"key":"24_CR3","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR4","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Liang, T., et al.: Encouraging divergent thinking in large language models through multi-agent debate. arXiv preprint arXiv:2305.19118 (2023)","DOI":"10.18653\/v1\/2024.emnlp-main.992"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Park, J.S., O\u2019Brien, J., Cai, C.J., Morris, M.R., Liang, P., Bernstein, M.S.: Generative agents: interactive simulacra of human behavior. In: Proceedings of the 36th Annual ACM Symposium on User Interface Software and Technology, pp. 1\u201322 (2023)","DOI":"10.1145\/3586183.3606763"},{"key":"24_CR7","unstructured":"Li, G., Hammoud, H., Itani, H., Khizbullin, D., Ghanem, B.: Camel: communicative agents for \u201cmind\u201d exploration of large language model society. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"24_CR8","unstructured":"Rana, K., Haviland, J., Garg, S., Abou-Chakra, J., Reid, I., Suenderhauf, N.: SayPlan: grounding large language models using 3d scene graphs for scalable task planning. arXiv preprint arXiv:2307.06135 (2023)"},{"key":"24_CR9","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota, June 2019. https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"24_CR10","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"24_CR11","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018)"},{"issue":"8","key":"24_CR12","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"24_CR13","unstructured":"Touvron, H., et\u00a0al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"24_CR14","unstructured":"Yao, X., Zheng, Y., Yang, X., Yang, Z.: NLP from scratch without large-scale pretraining: a simple and efficient framework. In: International Conference on Machine Learning, pp. 25438\u201325451. PMLR (2022)"},{"key":"24_CR15","first-page":"34201","volume":"36","author":"SM Xie","year":"2023","unstructured":"Xie, S.M., Santurkar, S., Ma, T., Liang, P.S.: Data selection for language models via importance resampling. Adv. Neural. Inf. Process. Syst. 36, 34201\u201334227 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR16","unstructured":"Kaddour, J.: The minipile challenge for data-efficient language models. arXiv preprint arXiv:2304.08442 (2023)"},{"key":"24_CR17","unstructured":"Gao, L., et\u00a0al.: The pile: an 800gb dataset of diverse text for language modeling. arXiv preprint arXiv:2101.00027 (2020)"},{"key":"24_CR18","unstructured":"Fei, Z., et al.: Query of cc: unearthing large scale domain-specific knowledge from public corpora. arXiv preprint arXiv:2401.14624 (2024)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Aligning books and movies: towards story-like visual explanations by watching movies and reading books. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp. 19\u201327 (2015)","DOI":"10.1109\/ICCV.2015.11"},{"key":"24_CR20","unstructured":"Gokaslan, A., Cohen, V.: Openwebtext corpus (2019)"},{"key":"24_CR21","unstructured":"Trinh, T.H., Le, Q.V.: A simple method for commonsense reasoning. ArXiv abs\/1806.02847 (2018)"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Grusky, M., Naaman, M., Artzi, Y.: Newsroom: A dataset of 1.3 million summaries with diverse extractive strategies. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (2018)","DOI":"10.18653\/v1\/N18-1065"},{"key":"24_CR23","doi-asserted-by":"publisher","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 8440\u20138451. Association for Computational Linguistics, July 2020. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.747, https:\/\/aclanthology.org\/2020.acl-main.747","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"24_CR24","unstructured":"Levesque, H., Davis, E., Morgenstern, L.: The winograd schema challenge. In: Thirteenth international conference on the principles of knowledge representation and reasoning (2012)"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"McAuley, J., Leskovec, J.: Hidden factors and hidden topics: understanding rating dimensions with review text. In: Proceedings of the 7th ACM Conference on Recommender Systems (2013)","DOI":"10.1145\/2507157.2507163"},{"key":"24_CR26","doi-asserted-by":"publisher","unstructured":"Gururangan, S., et al.: Don\u2019t stop pretraining: adapt language models to domains and tasks. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, 5\u201310 July 2020, pp. 8342\u20138360. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.740, https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.740","DOI":"10.18653\/v1\/2020.acl-main.740"},{"issue":"2","key":"24_CR27","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1016\/S0016-0032(96)00063-4","volume":"334","author":"M Men\u00e9ndez","year":"1997","unstructured":"Men\u00e9ndez, M., Pardo, J., Pardo, L., Pardo, M.: The jensen-shannon divergence. J. Franklin Inst. 334(2), 307\u2013318 (1997)","journal-title":"J. Franklin Inst."},{"key":"24_CR28","unstructured":"Yao, X., Zheng, Y., Yang, X., Yang, Z.: NLP from scratch without large-scale pretraining: a simple and efficient framework. ArXiv abs\/2111.04130 (2021)"},{"key":"24_CR29","unstructured":"Zeng, A., et\u00a0al.: GLM-130b: an open bilingual pre-trained model. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: Adaprompt: adaptive model training for prompt-based NLP. ArXiv abs\/2202.04824 (2022)","DOI":"10.18653\/v1\/2022.findings-emnlp.448"},{"key":"24_CR31","first-page":"23908","volume":"35","author":"X Chen","year":"2022","unstructured":"Chen, X., et al.: Decoupling knowledge from memorization: retrieval-augmented prompt learning. Adv. Neural. Inf. Process. Syst. 35, 23908\u201323922 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Schick, T., Sch\u00fctze, H.: Exploiting cloze-questions for few-shot text classification and natural language inference. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pp. 255\u2013269 (2021)","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"key":"24_CR33","unstructured":"Sun, Y., Zheng, Y., Hao, C., Qiu, H.: NSP-BERT: a prompt-based few-shot learner through an original pre-training task \u2014\u2014 next sentence prediction. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 3233\u20133250. International Committee on Computational Linguistics, Gyeongju, Republic of Korea, October 2022. https:\/\/aclanthology.org\/2022.coling-1.286"},{"key":"24_CR34","unstructured":"Bai, J., et\u00a0al.: Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Gao, T., Fisch, A., Chen, D.: Making pre-trained language models better few-shot learners. In: ACL 2021: 59th annual meeting of the Association for Computational Linguistics, pp. 3816\u20133830 (2021)","DOI":"10.18653\/v1\/2021.acl-long.295"},{"key":"24_CR36","unstructured":"Wang, S., Fang, H., Khabsa, M., Mao, H., Ma, H.: Entailment as few-shot learner. ArXiv abs\/2104.14690 (2021)"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Liang, X., et al.: Contrastive demonstration tuning for pre-trained language models. ArXiv abs\/2204.04392 (2022)","DOI":"10.18653\/v1\/2022.findings-emnlp.56"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Hu, S., Ding, N., Wang, H., Liu, Z., Li, J.Z., Sun, M.: Knowledgeable prompt-tuning: incorporating knowledge into prompt verbalizer for text classification. In: ACL (2022)","DOI":"10.18653\/v1\/2022.acl-long.158"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of EMNLP (2013)","DOI":"10.18653\/v1\/D13-1170"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"PANG, B.: Seeing stars: Exploiting class relationships for sentiment categorization with respect to rating scales. In: Proceedings of ACL (2005)","DOI":"10.3115\/1219840.1219855"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Pang, B., Lee, L.: A sentimental education: sentiment analysis using subjectivity summarization based on minimum cuts. In: Proceedings of ACL (2004)","DOI":"10.3115\/1218955.1218990"},{"key":"24_CR42","doi-asserted-by":"crossref","unstructured":"Hu, M., Liu, B.: Mining and summarizing customer reviews. In: Proceedings of KDD (2004)","DOI":"10.1145\/1014052.1014073"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Wiebe, J., Wilson, T., Cardie, C.: Annotating expressions of opinions and emotions in language. Language resources and evaluation (2005)","DOI":"10.1007\/s10579-005-7880-9"},{"key":"24_CR44","unstructured":"Zhang, X., Zhao, J., LeCun, Y.: Character-level convolutional networks for text classification. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"24_CR45","doi-asserted-by":"crossref","unstructured":"Le\u00a0Scao, T., Rush, A.M.: How many data points is a prompt worth? In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 2627\u20132636 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.208"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-9431-7_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T15:54:09Z","timestamp":1732982049000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-9431-7_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9789819794300","9789819794317"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-9431-7_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2024\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}