{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T07:13:42Z","timestamp":1774077222077,"version":"3.50.1"},"reference-count":45,"publisher":"Association for Natural Language Processing","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Journal of Natural Language Processing"],"published-print":{"date-parts":[[2026]]},"DOI":"10.5715\/jnlp.33.158","type":"journal-article","created":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T22:12:53Z","timestamp":1773526373000},"page":"158-185","source":"Crossref","is-referenced-by-count":0,"title":["On the Robustness of LLM-Generated Text Detection Against Instruction Diversity"],"prefix":"10.5715","volume":"33","author":[{"given":"Ryuto","family":"Koike","sequence":"first","affiliation":[{"name":"School of Computing, Institute of Science Tokyo"}]},{"given":"Masahiro","family":"Kaneko","sequence":"additional","affiliation":[{"name":"School of Computing, Institute of Science Tokyo"},{"name":"Mohamed bin Zayed University of Artificial Intelligence (MBZUAI)"}]},{"given":"Naoaki","family":"Okazaki","sequence":"additional","affiliation":[{"name":"School of Computing, Institute of Science Tokyo"},{"name":"National Institute of Advanced Industrial Science and Technology (AIST)"},{"name":"Research and Development Center for Large Language Models, National Institute of Informatics (NII LLMC)"}]}],"member":"3685","reference":[{"key":"1","unstructured":"Bao, G., Zhao, Y., Teng, Z., Yang, L., and Zhang, Y. (2024). \u201cFast-DetectGPT: Efficient Zero-Shot Detection of Machine-Generated Text via Conditional Probability Curvature.\u201d In <i>The 12th International Conference on Learning Representations<\/i>."},{"key":"2","doi-asserted-by":"crossref","unstructured":"Beresneva, D. (2016). \u201cComputer-generated text detection using machine learning: A systematic review.\u201d In <i>21st International Conference on Applications of Natural Language to Information Systems, NLDB<\/i>, pp. 421\u2013426. Springer.","DOI":"10.1007\/978-3-319-41754-7_43"},{"key":"3","unstructured":"Chen, Z., Feng, Y., He, C., Deng, Y., Pu, H., and Li, B. (2025). \u201cIPAD: Inverse Prompt for AI Detection \u2013 A Robust and Explainable LLM-Generated Text Detector.\u201d <i>arXiv preprint arXiv:2502.15902<\/i>."},{"key":"4","doi-asserted-by":"crossref","unstructured":"Chiang, C.-H. and Lee, H.-y. (2023). \u201cCan Large Language Models Be an Alternative to Human Evaluations?\u201d In <i>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 15607\u201315631, Toronto, Canada. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2023.acl-long.870"},{"key":"5","doi-asserted-by":"crossref","unstructured":"Dugan, L., Hwang, A., Trhl\u00edk, F., Zhu, A., Ludan, J. M., Xu, H., Ippolito, D., and Callison-Burch, C. (2024). \u201cRAID: A Shared Benchmark for Robust Evaluation of Machine-Generated Text Detectors.\u201d In Ku, L.-W., Martins, A., and Srikumar, V. (Eds.), <i>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 12463\u201312492, Bangkok, Thailand. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.acl-long.674"},{"key":"6","unstructured":"Feng, Y., Qiang, J., Li, Y., Yuan, Y., and Zhu, Y. (2023). \u201cSentence Simplification via Large Language Models.\u201d <i>arXiv preprint arXiv:2302.11957<\/i>."},{"key":"7","unstructured":"Guide, P. E. (2024). \u201cGeneral Tips for Designing Prompts.\u201d https:\/\/www.promptingguide.ai\/introduction\/tips. Accessed: 2024-02-10."},{"key":"8","unstructured":"Guo, B., Zhang, X., Wang, Z., Jiang, M., Nie, J., Ding, Y., Yue, J., and Wu, Y. (2023). \u201cHow Close is ChatGPT to Human Experts? Comparison Corpus, Evaluation, and Detection.\u201d <i>arXiv preprint arXiv:2301.07597<\/i>."},{"key":"9","doi-asserted-by":"crossref","unstructured":"Ippolito, D., Duckworth, D., Callison-Burch, C., and Eck, D. (2020). \u201cAutomatic Detection of Generated Text is Easiest when Humans are Fooled.\u201d In <i>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics<\/i>, pp. 1808\u20131822, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2020.acl-main.164"},{"key":"10","doi-asserted-by":"crossref","unstructured":"Jiang, Z., Xu, F. F., Araki, J., and Neubig, G. (2020). \u201cHow Can We Know What Language Models Know?\u201d <i>arXiv preprint arXiv:1911.12543<\/i>.","DOI":"10.1162\/tacl_a_00324"},{"key":"11","doi-asserted-by":"crossref","unstructured":"Kamalloo, E., Dziri, N., Clarke, C. L. A., and Rafiei, D. (2023). \u201cEvaluating Open-Domain Question Answering in the Era of Large Language Models.\u201d <i>arXiv preprint arXiv:2305.06984<\/i>.","DOI":"10.18653\/v1\/2023.acl-long.307"},{"key":"12","doi-asserted-by":"crossref","unstructured":"Ke, Z. and Ng, V. (2019). \u201cAutomated Essay Scoring: A Survey of the State of the Art.\u201d In Kraus, S. (Ed.), <i>Proceedings of the 28th International Joint Conference on Artificial Intelligence, IJCAI 2019, Macao, China, August 10-16, 2019<\/i>, pp. 6300\u20136308. ijcai.org.","DOI":"10.24963\/ijcai.2019\/879"},{"key":"13","unstructured":"Kirchenbauer, J., Geiping, J., Wen, Y., Katz, J., Miers, I., and Goldstein, T. (2023). \u201cA Watermark for Large Language Models.\u201d <i>arXiv preprint arXiv:2301.10226<\/i>."},{"key":"14","doi-asserted-by":"crossref","unstructured":"Koike, R., Kaneko, M., and Okazaki, N. (2024a). \u201cHow You Prompt Matters! Even Task-Oriented Constraints in Instructions Affect LLM-Generated Text Detection.\u201d In <i>Findings of the Association for Computational Linguistics: EMNLP 2024<\/i>, pp. 14384\u201314395, Miami, Florida, USA. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.findings-emnlp.841"},{"key":"15","doi-asserted-by":"crossref","unstructured":"Koike, R., Kaneko, M., and Okazaki, N. (2024b). \u201cOUTFOX: LLM-Generated Essay Detection Through In-Context Learning with Adversarially Generated Examples.\u201d In <i>Proceedings of the 38th AAAI Conference on Artificial Intelligence<\/i>, Vancouver, Canada.","DOI":"10.1609\/aaai.v38i19.30120"},{"key":"16","unstructured":"Krishna, K., Song, Y., Karpinska, M., Wieting, J., and Iyyer, M. (2023). \u201cParaphrasing Evades Detectors of AI-generated Text, but Retrieval is an Effective Defense.\u201d <i>arXiv preprint arXiv:2303.13408<\/i>."},{"key":"17","unstructured":"Lavergne, T., Urvoy, T., and Yvon, F. (2008). \u201cDetecting Fake Content with Relative Entropy Scoring.\u201d In <i>Proceedings of the ECAI\u201908 Workshop on Uncovering Plagiarism, Authorship and Social Software Misuse<\/i>, CEUR Workshop Proceedings."},{"key":"18","doi-asserted-by":"crossref","unstructured":"Li, J., Galley, M., Brockett, C., Gao, J., and Dolan, B. (2016). \u201cA Diversity-Promoting Objective Function for Neural Conversation Models.\u201d In <i>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies<\/i>, pp. 110\u2013119, San Diego, California. Association for Computational Linguistics.","DOI":"10.18653\/v1\/N16-1014"},{"key":"19","doi-asserted-by":"crossref","unstructured":"Li, Y., Li, Q., Cui, L., Bi, W., Wang, Z., Wang, L., Yang, L., Shi, S., and Zhang, Y. (2024). \u201cMAGE: Machine-generated Text Detection in the Wild.\u201d In Ku, L.-W., Martins, A., and Srikumar, V. (Eds.), <i>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 36\u201353, Bangkok, Thailand. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.acl-long.3"},{"key":"20","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., and Neubig, G. (2021). \u201cPre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing.\u201d <i>arXiv preprint arXiv:2107.13586<\/i>."},{"key":"21","doi-asserted-by":"crossref","unstructured":"Liu, Y., Iter, D., Xu, Y., Wang, S., Xu, R., and Zhu, C. (2023a). \u201cG-Eval: NLG Evaluation using GPT-4 with Better Human Alignment.\u201d <i>arXiv preprint arXiv:2303.16634<\/i>.","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"22","unstructured":"Liu, Y., Zhang, Z., Zhang, W., Yue, S., Zhao, X., Cheng, X., Zhang, Y., and Hu, H. (2023b). \u201cArguGPT: evaluating, understanding and identifying argumentative essays generated by GPT models.\u201d <i>arXiv preprint arXiv:2304.07666<\/i>."},{"key":"23","unstructured":"Mitchell, E., Lee, Y., Khazatsky, A., Manning, C. D., and Finn, C. (2023). \u201cDetectGPT: Zero-Shot Machine-Generated Text Detection using Probability Curvature.\u201d <i>arXiv preprint arXiv:2301.11305<\/i>."},{"key":"24","unstructured":"Mitrovi\u0107, S., Andreoletti, D., and Ayoub, O. (2023). \u201cChatGPT or Human? Detect and Explain. Explaining Decisions of Machine Learning Model for Detecting Short ChatGPT-generated Text.\u201d <i>arXiv preprint arXiv:2301.13852<\/i>."},{"key":"25","unstructured":"OpenAI (2023a). \u201cHow can educators respond to students presenting AI-generated content as their own?\u201d https:\/\/help.openai.com\/en\/articles\/8313351-how-can-educators-respond-to-students-presenting-ai-generated-content-as-their-own. Accessed: 2023-11-10."},{"key":"26","unstructured":"OpenAI (2023b). \u201cIntroducing ChatGPT.\u201d https:\/\/openai.com\/blog\/chatgpt. Accessed on 2023-05-10."},{"key":"27","unstructured":"OpenAI (2023c). \u201cPrompt Engineering Guide.\u201d https:\/\/platform.openai.com\/docs\/guides\/prompt-engineering. Accessed: 2023-10-10."},{"key":"28","unstructured":"Park, C., Kim, H. J., Kim, J., Kim, Y., Kim, T., Cho, H., Jo, H., goo Lee, S., and Yoo, K. M. (2024). \u201cInvestigating the Influence of Prompt-Specific Shortcuts in AI Generated Text Detection.\u201d <i>arXiv preprint arXiv:2406.16275<\/i>."},{"key":"29","doi-asserted-by":"crossref","unstructured":"Rodriguez, J. D., Hay, T., Gros, D., Shamsi, Z., and Srinivasan, R. (2022). \u201cCross-Domain Detection of GPT-2-Generated Technical Text.\u201d In <i>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies<\/i>, pp. 1213\u20131233, Seattle, United States. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2022.naacl-main.88"},{"key":"30","unstructured":"Sadasivan, V. S., Kumar, A., Balasubramanian, S., Wang, W., and Feizi, S. (2023). \u201cCan AI-Generated Text be Reliably Detected?\u201d <i>arXiv preprint arXiv:2303.11156<\/i>."},{"key":"31","unstructured":"Solaiman, I., Brundage, M., Clark, J., Askell, A., Herbert-Voss, A., Wu, J., Radford, A., Krueger, G., Kim, J. W., Kreps, S., McCain, M., Newhouse, A., Blazakis, J., McGuffie, K., and Wang, J. (2019). \u201cRelease Strategies and the Social Impacts of Language Models.\u201d <i>arXiv preprint arXiv:1908.09203<\/i>."},{"key":"32","doi-asserted-by":"crossref","unstructured":"Su, J., Zhuo, T. Y., Wang, D., and Nakov, P. (2023). \u201cDetectLLM: Leveraging Log Rank Information for Zero-Shot Detection of Machine-Generated Text.\u201d <i>arXiv preprint arXiv:2306.05540<\/i>.","DOI":"10.18653\/v1\/2023.findings-emnlp.827"},{"key":"33","unstructured":"Taguchi, K., Gu, Y., and Sakurai, K. (2024). \u201cThe Impact of Prompts on Zero-Shot Detection of AI-Generated Text.\u201d <i>arXiv preprint arXiv:2403.20127<\/i>."},{"key":"34","unstructured":"Tang, R., Chuang, Y.-N., and Hu, X. (2023). \u201cThe Science of Detecting LLM-Generated Texts.\u201d <i>arXiv preprint arXiv:2303.07205<\/i>."},{"key":"35","unstructured":"Touvron, H., Martin, L., Stone, K., Albert, P., Almahairi, A., Babaei, Y., Bashlykov, N., Batra, S., Bhargava, P., Bhosale, S., et al. (2023). \u201cLlama 2: Open foundation and fine-tuned chat models.\u201d <i>arXiv preprint arXiv:2307.09288<\/i>."},{"key":"36","doi-asserted-by":"crossref","unstructured":"Uchendu, A., Le, T., Shu, K., and Lee, D. (2020). \u201cAuthorship Attribution for Neural Text Generation.\u201d In <i>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)<\/i>, pp. 8384\u20138395, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2020.emnlp-main.673"},{"key":"37","doi-asserted-by":"crossref","unstructured":"Wang, Y., Feng, S., Hou, A., Pu, X., Shen, C., Liu, X., Tsvetkov, Y., and He, T. (2024a). \u201cStumbling Blocks: Stress Testing the Robustness of Machine-Generated Text Detectors Under Attacks.\u201d In Ku, L.-W., Martins, A., and Srikumar, V. (Eds.), <i>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 2894\u20132925, Bangkok, Thailand. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.acl-long.160"},{"key":"38","doi-asserted-by":"crossref","unstructured":"Wang, Y., Mansurov, J., Ivanov, P., Su, J., Shelmanov, A., Tsvigun, A., Whitehouse, C., Mohammed Afzal, O., Mahmoud, T., Sasaki, T., Arnold, T., Aji, A. F., Habash, N., Gurevych, I., and Nakov, P. (2024b). \u201cM4: Multi-generator, Multi-domain, and Multi-lingual Black-Box Machine-Generated Text Detection.\u201d In Graham, Y. and Purver, M. (Eds.), <i>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 1369\u20131407, St. Julian\u2019s, Malta. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.eacl-long.83"},{"key":"39","unstructured":"Wu, J., Yang, S., Zhan, R., Yuan, Y., Wong, D. F., and Chao, L. S. (2023). \u201cA Survey on LLM-generated Text Detection: Necessity, Methods, and Future Directions.\u201d <i>arXiv preprint arXiv:2310.14724<\/i>."},{"key":"40","doi-asserted-by":"crossref","unstructured":"Wu, J., Zhan, R., Wong, D. F., Yang, S., Yang, X., Yuan, Y., and Chao, L. S. (2024). \u201cDetectRL: Benchmarking LLM-Generated Text Detection in Real-World Scenarios.\u201d In <i>The 38 Conference on Neural Information Processing Systems Datasets and Benchmarks Track<\/i>, pp. 100369\u2013100401.","DOI":"10.52202\/079017-3186"},{"key":"41","unstructured":"Yu, X., Qi, Y., Chen, K., Chen, G., Yang, X., ZHU, P., Shang, X., Zhang, W., and Yu, N. (2024). \u201cDPIC: Decoupling Prompt and Intrinsic Characteristics for LLM Generated Text Detection.\u201d In <i>The 38th Annual Conference on Neural Information Processing Systems<\/i>."},{"key":"42","unstructured":"Yu, Y., Khan, A. R., and Xu, J. (2022). \u201cMeasuring Robustness for NLP.\u201d In <i>Proceedings of the 29th International Conference on Computational Linguistics<\/i>, pp. 3908\u20133916, Gyeongju, Republic of Korea. International Committee on Computational Linguistics."},{"key":"43","unstructured":"Zhang, B., Haddow, B., and Birch, A. (2023a). \u201cPrompting Large Language Model for Machine Translation: A Case Study.\u201d <i>arXiv preprint arXiv:2301.07069<\/i>."},{"key":"44","doi-asserted-by":"crossref","unstructured":"Zhang, T., Ladhak, F., Durmus, E., Liang, P., McKeown, K., and Hashimoto, T. B. (2023b). \u201cBenchmarking Large Language Models for News Summarization.\u201d <i>arXiv preprint arXiv:2301.13848<\/i>.","DOI":"10.1162\/tacl_a_00632"},{"key":"45","doi-asserted-by":"crossref","unstructured":"Zhao, T., Zhao, R., and Eskenazi, M. (2017). \u201cLearning Discourse-level Diversity for Neural Dialog Models using Conditional Variational Autoencoders.\u201d In <i>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 654\u2013664, Vancouver, Canada. Association for Computational Linguistics.","DOI":"10.18653\/v1\/P17-1061"}],"container-title":["Journal of Natural Language Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/jnlp\/33\/1\/33_158\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T03:53:19Z","timestamp":1774065199000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/jnlp\/33\/1\/33_158\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026]]}},"URL":"https:\/\/doi.org\/10.5715\/jnlp.33.158","relation":{},"ISSN":["1340-7619","2185-8314"],"issn-type":[{"value":"1340-7619","type":"print"},{"value":"2185-8314","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}