{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T06:34:16Z","timestamp":1763793256709,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819533428","type":"print"},{"value":"9789819533435","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T00:00:00Z","timestamp":1763856000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T00:00:00Z","timestamp":1763856000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3343-5_40","type":"book-chapter","created":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T06:30:35Z","timestamp":1763793035000},"page":"516-528","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["TianWen: A Comprehensive Benchmark for\u00a0Evaluating LLMs in\u00a0Chinese Classical Poetry Understanding and\u00a0Reasoning"],"prefix":"10.1007","author":[{"given":"Zhenwu","family":"Pei","sequence":"first","affiliation":[]},{"given":"Rongbo","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xuefeng","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Kehai","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yingjie","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Andong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Min","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,23]]},"reference":[{"key":"40_CR1","doi-asserted-by":"publisher","unstructured":"Agirrezabal, M., Oliveira, H.G., Ormazabal, A.: Erato: automatizing poetry evaluation. In: EPIA Conference on Artificial Intelligence, pp. 3\u201314. Springer (2023). https:\/\/doi.org\/10.1007\/978-3-031-49011-8_1","DOI":"10.1007\/978-3-031-49011-8_1"},{"key":"40_CR2","unstructured":"Anand, Y., Nussbaum, Z., Duderstadt, B., Schmidt, B., Mulyar, A.: GPT4all: training an assistant-style chatbot with large scale data distillation from GPT-3.5-turbo. GitHub (2023). https:\/\/github.com\/nomic-ai\/gpt4all"},{"key":"40_CR3","doi-asserted-by":"publisher","first-page":"51358","DOI":"10.52202\/079017-1626","volume":"37","author":"J Cao","year":"2024","unstructured":"Cao, J., Liu, Y., Shi, Y., Ding, K., Jin, L.: Wenmind: a comprehensive benchmark for evaluating large language models in Chinese classical literature and language arts. Adv. Neural. Inf. Process. Syst. 37, 51358\u201351410 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"40_CR4","doi-asserted-by":"crossref","unstructured":"Chen, H., Yi, X., Sun, M., Li, W., Yang, C., Guo, Z.: Sentiment-controllable Chinese poetry generation. In: IJCAI, pp. 4925\u20134931 (2019)","DOI":"10.24963\/ijcai.2019\/684"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Fu, J., Ng, S.K., Jiang, Z., Liu, P.: GPTScore: evaluate as you desire. arXiv preprint arXiv:2302.04166 (2023)","DOI":"10.18653\/v1\/2024.naacl-long.365"},{"key":"40_CR6","unstructured":"Grattafiori, A., et\u00a0al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"40_CR7","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1162\/tacl_a_00469","volume":"10","author":"J Guan","year":"2022","unstructured":"Guan, J., et al.: Lot: a story-centric benchmark for evaluating Chinese long text understanding and generation. Trans. Assoc. Comput. Linguist. 10, 434\u2013451 (2022)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"40_CR8","unstructured":"Guo, D., et\u00a0al.: DeepSeek-R1: incentivizing reasoning capability in LLMS via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"40_CR9","doi-asserted-by":"publisher","unstructured":"Guo, G., Yang, J., Lu, F., Qin, J., Tang, T., Zhao, W.X.: Towards effective ancient chinese translation: dataset, model, and evaluation. In: CCF International Conference on Natural Language Processing and Chinese Computing, pp. 416\u2013427. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-44696-2_33","DOI":"10.1007\/978-3-031-44696-2_33"},{"key":"40_CR10","doi-asserted-by":"crossref","unstructured":"Hu, Z., Liu, C., Feng, Y., Luu, A.T., Hooi, B.: PoetryDiffusion: towards joint semantic and metrical manipulation in poetry generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 18279\u201318288 (2024)","DOI":"10.1609\/aaai.v38i16.29787"},{"key":"40_CR11","first-page":"62991","volume":"36","author":"Y Huang","year":"2023","unstructured":"Huang, Y., et al.: C-Eval: a multi-level multi-discipline Chinese evaluation suite for foundation models. Adv. Neural. Inf. Process. Syst. 36, 62991\u201363010 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"40_CR12","unstructured":"Hurst, A., et\u00a0al.: GPT-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"40_CR13","doi-asserted-by":"publisher","unstructured":"Ji, Z., Shen, Y., Sun, Y., Yu, T., Wang, X.: C-CLUE: a benchmark of classical chinese based on a crowdsourcing system for knowledge graph construction. In: Qin, B., Jin, Z., Wang, H., Pan, J., Liu, Y., An, B. (eds.) CCKS 2021. CCIS, vol. 1466, pp. 295\u2013301. Springer, Singapore (2021). https:\/\/doi.org\/10.1007\/978-981-16-6471-7_24","DOI":"10.1007\/978-981-16-6471-7_24"},{"key":"40_CR14","doi-asserted-by":"crossref","unstructured":"Kurokawa, R., et al.: Diagnostic performances of CLAUDE 3 opus and CLAUDE 3.5 sonnet from patient history and key images in radiology\u2019s \u201cdiagnosis please\u201d cases. Jpn. J. Radiol. 42, 1\u20134 (2024)","DOI":"10.1007\/s11604-024-01634-z"},{"key":"40_CR15","doi-asserted-by":"crossref","unstructured":"Kwon, W., et al.: Efficient memory management for large language model serving with PagedAttention. In: Proceedings of the 29th Symposium on Operating Systems Principles, pp. 611\u2013626 (2023)","DOI":"10.1145\/3600006.3613165"},{"key":"40_CR16","doi-asserted-by":"crossref","unstructured":"Li, H., et al.: CMMLU: measuring massive multitask language understanding in Chinese. In: Findings of the Association for Computational Linguistics ACL 2024, pp. 11260\u201311285 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.671"},{"key":"40_CR17","unstructured":"Li, W., Qi, F., Sun, M., Yi, X., Zhang, J.: CCPM: a Chinese classical poetry matching dataset. arXiv preprint arXiv:2106.01979 (2021)"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Li, X., Chen, K., Long, Y., Bai, X., Xu, Y., Zhang, M.: Generator-assistant stepwise rollback framework for large language model agent. arXiv preprint arXiv:2503.02519 (2025)","DOI":"10.18653\/v1\/2025.emnlp-main.892"},{"key":"40_CR19","unstructured":"Liu, A., et\u00a0al.: DeepSek-V3 technical report. arXiv preprint arXiv:2412.19437 (2024)"},{"key":"40_CR20","unstructured":"Liu, X., et al.: AlignBnch: benchmarking Chinese alignment of large language models. In: ACL (1) (2024)"},{"issue":"6","key":"40_CR21","first-page":"1252","volume":"57","author":"Y Liu","year":"2020","unstructured":"Liu, Y., Wu, B., Bai, T.: The construction and analysis of classical Chinese poetry knowledge graph. J. Comput. Res. Dev. 57(6), 1252 (2020)","journal-title":"J. Comput. Res. Dev."},{"key":"40_CR22","unstructured":"Narzary, S., Brahma, B., Mahilary, H., Brahma, M., Som, B., Nandi, S.: Comparative study of zero-shot cross-lingual transfer for BODO POS and NER tagging using GEMINI 2.0 flash thinking experimental model. arXiv preprint arXiv:2503.04405 (2025)"},{"issue":"6","key":"40_CR23","doi-asserted-by":"publisher","first-page":"929","DOI":"10.1017\/S1351324917000171","volume":"23","author":"HG Oliveira","year":"2017","unstructured":"Oliveira, H.G., Herv\u00e1s, R., D\u00edaz, A., Gerv\u00e1s, P.: Multilingual extension and evaluation of a poetry generator. Nat. Lang. Eng. 23(6), 929\u2013967 (2017)","journal-title":"Nat. Lang. Eng."},{"key":"40_CR24","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"40_CR25","unstructured":"Penedo, G., et al.: The refinedWeb dataset for falcon LLM: outperforming curated corpora with web data, and web data only. arXiv preprint arXiv:2306.01116 (2023)"},{"key":"40_CR26","doi-asserted-by":"crossref","unstructured":"Shao, Y., Shao, T., Wang, M., Wang, P., Gao, J.: A sentiment and style controllable approach for Chinese poetry generation. In: Proceedings of the 30th ACM international conference on information & knowledge management, pp. 4784\u20134788 (2021)","DOI":"10.1145\/3459637.3481964"},{"key":"40_CR27","unstructured":"Su, Q., Wang, Y., Deng, Z., Yang, H., Wang, J.: CCL23-EVAL(guner2023)(overview of ccl23-EVAL task 1: Named entity recognition in ancient Chinese books). In: Proceedings CCL 2023 (Vol. 3: Evaluations), pp. 34\u201340 (2023)"},{"key":"40_CR28","unstructured":"THUDM: GLM-4-9b-chat. https:\/\/huggingface.co\/THUDM\/glm-4-9b-chat (2024). Accessed 4 July 2025"},{"key":"40_CR29","unstructured":"Toivanen, J.: Corpus-based generation of content and form in poetry. In: Conference on Computational Creativity (ICCC), vol.\u00a030, p.\u00a005 (2012)"},{"key":"40_CR30","unstructured":"Wang, A., et al.: Superglue: a stickier benchmark for general-purpose language understanding systems. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"40_CR31","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: Glue: a multi-task benchmark and analysis platform for natural language understanding. In: International Conference on Learning Representations (2018)","DOI":"10.18653\/v1\/W18-5446"},{"key":"40_CR32","unstructured":"Wang, D., et\u00a0al.: Evahan2023: overview of the first international ancient Chinese translation bakeoff. In: Proceedings of ALT2023: Ancient Language Translation Workshop, pp. 1\u201314 (2023)"},{"key":"40_CR33","doi-asserted-by":"crossref","unstructured":"Wang, S., Wong, D.F., Yao, J., Chao, L.S.: What is the best way for ChatGPT to translate poetry? arXiv preprint arXiv:2406.03450 (2024)","DOI":"10.18653\/v1\/2024.acl-long.756"},{"key":"40_CR34","unstructured":"Wang, Z., Zhang, J., Ma, J.: A method to judge the style of classical poetry based on pre-trained model. arXiv preprint arXiv:2211.04657 (2022)"},{"key":"40_CR35","unstructured":"Yang, A., et\u00a0al.: Qwen2. 5 technical report. arXiv preprint arXiv:2412.15115 (2024)"},{"key":"40_CR36","unstructured":"Yao, Y., et\u00a0al.: CUGE: a Chinese language understanding and generation evaluation benchmark. arXiv preprint arXiv:2112.13610 (2021)"},{"key":"40_CR37","unstructured":"Zeng, H.: Measuring massive multitask Chinese understanding. arXiv preprint arXiv:2304.12986 (2023)"},{"key":"40_CR38","unstructured":"Zhang, H., Chen, K., Bai, X., Li, X., Xiang, Y., Zhang, M.: Exploring translation mechanism of large language models. arXiv preprint arXiv:2502.11806 (2025)"},{"key":"40_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, N., et\u00a0al.: CBLUE: a Chinese biomedical language understanding evaluation benchmark. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 7888\u20137915 (2022)","DOI":"10.18653\/v1\/2022.acl-long.544"},{"key":"40_CR40","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: BERTScore: evaluating text generation with BERT. arXiv preprint arXiv:1904.09675 (2019)"},{"key":"40_CR41","unstructured":"Zhang, Y., Li, H.: Can large language model comprehend ancient Chinese? A preliminary test on ACLUE. In: Proceedings of the Ancient Language Processing Workshop, pp. 80\u201387 (2023)"},{"key":"40_CR42","doi-asserted-by":"crossref","unstructured":"Zhong, M., Liu, L., Chen, K., Yang, M., Zhang, M.: Context consistency between training and inference in simultaneous machine translation. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 13465\u201313476 (2024)","DOI":"10.18653\/v1\/2024.acl-long.727"},{"key":"40_CR43","doi-asserted-by":"crossref","unstructured":"Zhou, B., Chen, Q., Wang, T., Zhong, X., Zhang, Y.: WYWEB: a NLP evaluation benchmark for classical Chinese. arXiv preprint arXiv:2305.14150 (2023)","DOI":"10.18653\/v1\/2023.findings-acl.204"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3343-5_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T06:30:53Z","timestamp":1763793053000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3343-5_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,23]]},"ISBN":["9789819533428","9789819533435"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3343-5_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,23]]},"assertion":[{"value":"23 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}