{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:39:57Z","timestamp":1767321597091,"version":"3.48.0"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819541577","type":"print"},{"value":"9789819541584","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-4158-4_6","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:37:00Z","timestamp":1767321420000},"page":"88-105","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["RBLU: A Benchmark to\u00a0Evaluate the\u00a0Reverse Inference Ability of\u00a0Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5085-6574","authenticated-orcid":false,"given":"Haowei","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0100-0320","authenticated-orcid":false,"given":"Fan","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7489-4439","authenticated-orcid":false,"given":"Sudi","family":"Xia","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1858-1961","authenticated-orcid":false,"given":"Liyi","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4657-3256","authenticated-orcid":false,"given":"Xingshen","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"issue":"3","key":"6_CR1","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1016\/j.cognition.2009.07.005","volume":"113","author":"CL Baker","year":"2009","unstructured":"Baker, C.L., Saxe, R., Tenenbaum, J.B.: Action understanding as inverse planning. Cognition 113(3), 329\u2013349 (2009). https:\/\/doi.org\/10.1016\/j.cognition.2009.07.005","journal-title":"Cognition"},{"key":"6_CR2","unstructured":"Berglund, L., et al.: The reversal curse: LLMs trained on \u201ca is B\u201d fail to learn \u201cB is a\u201d, May 2024"},{"key":"6_CR3","unstructured":"Bunescu, I.: Qa_legal_dataset_val (2024). https:\/\/huggingface.co\/datasets\/ibunescu\/qa_legal_dataset_val"},{"issue":"3","key":"6_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3641289","volume":"15","author":"Y Chang","year":"2024","unstructured":"Chang, Y., Wang, X., Wang, J., Wu, Y., Yang, L., Zhu, K., Chen, H., Yi, X., Wang, C., Wang, Y., Ye, W., Zhang, Y., Chang, Y., Yu, P.S., Yang, Q., Xie, X.: A survey on evaluation of large language models. ACM Trans. Intell. Syst. Technol. 15(3), 1\u201345 (2024). https:\/\/doi.org\/10.1145\/3641289","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"6_CR5","doi-asserted-by":"publisher","unstructured":"Choi, E., et al.: QuAC: question answering in context. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 2174\u20132184. Association for Computational Linguistics (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1241, Brussels, Belgium","DOI":"10.18653\/v1\/D18-1241"},{"key":"6_CR6","doi-asserted-by":"publisher","unstructured":"Clark, C., Lee, K., Chang, M.W., Kwiatkowski, T., Collins, M., Toutanova, K.: BoolQ: exploring the surprising difficulty of natural yes\/no questions. In: Proceedings of the 2019 Conference of the North, pp. 2924\u20132936. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1300, Minneapolis, Minnesota","DOI":"10.18653\/v1\/N19-1300"},{"key":"6_CR7","doi-asserted-by":"publisher","unstructured":"Creswell, A., Shanahan, M., Higgins, I.: Selection-inference: Exploiting large language models for interpretable logical reasoning, May 2022. https:\/\/doi.org\/10.48550\/arXiv.2205.09712","DOI":"10.48550\/arXiv.2205.09712"},{"key":"6_CR8","doi-asserted-by":"publisher","unstructured":"Dua, D., Wang, Y., Dasigi, P., Stanovsky, G., Singh, S., Gardner, M.: DROP a reading comprehension benchmark requiring discrete reasoning over paragraphs. In: Proceedings of the 2019 Conference of the North, pp. 2368\u20132378. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1246, Minneapolis, Minnesota","DOI":"10.18653\/v1\/N19-1246"},{"key":"6_CR9","unstructured":"Ehghaghi, M.: Malikeh1375\/medical-question-answering-datasets (2024). https:\/\/huggingface.co\/datasets\/Malikeh1375\/medical-question-answering-datasets"},{"key":"6_CR10","doi-asserted-by":"publisher","unstructured":"Fu, Y., Ou, L., Chen, M., Wan, Y., Peng, H., Khot, T.: Chain-of-thought hub: A continuous effort to measure large language models\u2019 reasoning performance, May 2023. https:\/\/doi.org\/10.48550\/arXiv.2305.17306","DOI":"10.48550\/arXiv.2305.17306"},{"key":"6_CR11","unstructured":"GLM, T., et al.: Chatglm: a family of large language models from glm-130b to glm-4 all tools (2024)"},{"key":"6_CR12","unstructured":"Hendrycks, D., Burns, C., Basart, S., Zou, A., Mazeika, M., Song, D., Steinhardt, J.: Measuring massive multitask language understanding. In: Proceedings of the International Conference on Learning Representations (ICLR) (2021)"},{"key":"6_CR13","unstructured":"Huang, Y., et al.: C-eval: A multi-level multi-discipline chinese evaluation suite for foundation models (2023). https:\/\/arxiv.org\/abs\/2305.08322"},{"key":"6_CR14","doi-asserted-by":"publisher","unstructured":"Joshi, M., Choi, E., Weld, D., Zettlemoyer, L.: TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension. In: Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1601\u20131611. Association for Computational Linguistics (2017). https:\/\/doi.org\/10.18653\/v1\/P17-1147, Vancouver, Canada","DOI":"10.18653\/v1\/P17-1147"},{"key":"6_CR15","unstructured":"Lawrence, S.: Winddude\/reddit_finance_43_250k, July 2023. https:\/\/huggingface.co\/datasets\/winddude\/reddit_finance_43_250k"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Li, H., et al.: Cmmlu: measuring massive multitask language understanding in Chinese (2024). https:\/\/arxiv.org\/abs\/2306.09212","DOI":"10.18653\/v1\/2024.findings-acl.671"},{"key":"6_CR17","unstructured":"Lin, C.Y.: Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381. Association for Computational Linguistics (2004), Barcelona, Spain"},{"key":"6_CR18","doi-asserted-by":"publisher","unstructured":"Liu, X., et al.: Large language models and causal inference in collaboration: a comprehensive survey, Mar 2024. https:\/\/doi.org\/10.48550\/arXiv.2403.09606","DOI":"10.48550\/arXiv.2403.09606"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Nayab, S., Rossolini, G., Buttazzo, G., Manes, N., Giacomelli, F.: Concise thoughts: Impact of output length on LLM reasoning and cost, July 2024","DOI":"10.2139\/ssrn.5293076"},{"key":"6_CR20","doi-asserted-by":"publisher","unstructured":"Piaget, J.: The Origins of Intelligence in Children. The Origins of Intelligence in Children, W W Norton & Co (1952). https:\/\/doi.org\/10.1037\/11494-000, New York, NY, US","DOI":"10.1037\/11494-000"},{"key":"6_CR21","doi-asserted-by":"publisher","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., Liang, P.: SQuAD: 100,000+ questions for machine comprehension of text. In: Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, pp. 2383\u20132392. Association for Computational Linguistics (2016). https:\/\/doi.org\/10.18653\/v1\/D16-1264, Austin, Texas","DOI":"10.18653\/v1\/D16-1264"},{"key":"6_CR22","doi-asserted-by":"publisher","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 3980\u20133990. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/D19-1410, Hong Kong, China","DOI":"10.18653\/v1\/D19-1410"},{"key":"6_CR23","doi-asserted-by":"publisher","unstructured":"Sakaguchi, K., Bras, R.L., Bhagavatula, C., Choi, Y.: WinoGrande: An adversarial winograd schema challenge at scale. Commun. ACM 64(9), 99\u2013106 (2021). https:\/\/doi.org\/10.1145\/3474381, New York, NY, USA","DOI":"10.1145\/3474381"},{"key":"6_CR24","doi-asserted-by":"publisher","unstructured":"Shen, S., Logeswaran, L., Lee, M., Lee, H., Poria, S., Mihalcea, R.: Understanding the capabilities and limitations of large language models for cultural commonsense. In: Duh, K., Gomez, H., Bethard, S. (eds.) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 5668\u20135680. Association for Computational Linguistics (Jun 2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.316, Mexico City, Mexico","DOI":"10.18653\/v1\/2024.naacl-long.316"},{"key":"6_CR25","unstructured":"Singhal, K., et al.: Large language models encode clinical knowledge (2022)"},{"key":"6_CR26","doi-asserted-by":"publisher","unstructured":"Srivastava, A., Rastogi, A., Rao, A., et\u00a0al.: Beyond the imitation game: Quantifying and extrapolating the capabilities of language models, June 2023. https:\/\/doi.org\/10.48550\/arXiv.2206.04615","DOI":"10.48550\/arXiv.2206.04615"},{"key":"6_CR27","doi-asserted-by":"publisher","unstructured":"Talmor, A., Herzig, J., Lourie, N., Berant, J.: COMMONSENSEQA: a question answering challenge targeting commonsense knowledge. In: Proceedings of the 2019 Conference of the North, pp. 4149\u20134158. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1421, Minneapolis, Minnesota","DOI":"10.18653\/v1\/N19-1421"},{"key":"6_CR28","unstructured":"Wang, A., et al.: Superglue: a stickier benchmark for general-purpose language understanding systems (2020). https:\/\/arxiv.org\/abs\/1905.00537"},{"key":"6_CR29","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: Glue: A multi-task benchmark and analysis platform for natural language understanding (2019). https:\/\/arxiv.org\/abs\/1804.07461"},{"key":"6_CR30","unstructured":"Wang, H.: Wanghw\/human-ai-comparison (2024). https:\/\/huggingface.co\/datasets\/wanghw\/human-ai-comparison"},{"key":"6_CR31","doi-asserted-by":"publisher","unstructured":"Wang, Y., et al.: MMLU-pro: a more robust and challenging multi-task language understanding benchmark, June 2024. https:\/\/doi.org\/10.48550\/arXiv.2406.01574","DOI":"10.48550\/arXiv.2406.01574"},{"issue":"1","key":"6_CR32","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1108\/MEQ-10-2020-0222","volume":"33","author":"M Wilson","year":"2022","unstructured":"Wilson, M., Paschen, J., Pitt, L.: The circular economy meets artificial intelligence (AI): understanding the opportunities of AI for reverse logistics. Manage. Environ. Quality Int. J. 33(1), 9\u201325 (2022). https:\/\/doi.org\/10.1108\/MEQ-10-2020-0222","journal-title":"Manage. Environ. Quality Int. J."},{"issue":"9","key":"6_CR33","doi-asserted-by":"publisher","first-page":"2080","DOI":"10.1007\/s10964-024-01994-9","volume":"53","author":"X Wu","year":"2024","unstructured":"Wu, X., Liu, H., Xiao, L., Yao, M.: Reciprocal relationship between learning interest and learning persistence: Roles of strategies for self-regulated learning behaviors and academic performance. J. Youth Adolesc. 53(9), 2080\u20132096 (2024). https:\/\/doi.org\/10.1007\/s10964-024-01994-9","journal-title":"J. Youth Adolesc."},{"key":"6_CR34","unstructured":"Yang, A., et al.: Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)"},{"key":"6_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Cai, H., Song, X., Chen, Y., Sun, R., Zheng, J.: Reverse chain: A generic-rule for LLMs to master multi-API planning, February 2024","DOI":"10.18653\/v1\/2024.findings-naacl.22"},{"key":"6_CR36","doi-asserted-by":"publisher","unstructured":"Zhong, M., et al.: QMSum: a new benchmark for query-based multi-domain meeting summarization. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 5905\u20135921. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.naacl-main.472, Online","DOI":"10.18653\/v1\/2021.naacl-main.472"},{"key":"6_CR37","doi-asserted-by":"crossref","unstructured":"Zhong, W., et al.: AGIEval: A human-centric benchmark for evaluating foundation models, September 2023","DOI":"10.18653\/v1\/2024.findings-naacl.149"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-4158-4_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:37:03Z","timestamp":1767321423000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-4158-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819541577","9789819541584"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-4158-4_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 May 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 May 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dasfaa2025.github.io","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}