{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:18:18Z","timestamp":1750306698402,"version":"3.41.0"},"reference-count":44,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T00:00:00Z","timestamp":1746144000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Natural Language Processing Journal"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1016\/j.nlp.2025.100151","type":"journal-article","created":{"date-parts":[[2025,5,15]],"date-time":"2025-05-15T16:01:49Z","timestamp":1747324909000},"page":"100151","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Understanding the effects of human-written paraphrases in LLM-generated text detection"],"prefix":"10.1016","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7085-1800","authenticated-orcid":false,"given":"Hiu Ting","family":"Lau","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4583-3623","authenticated-orcid":false,"given":"Arkaitz","family":"Zubiaga","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.nlp.2025.100151_b1","article-title":"The question answering systems: A survey","volume":"2","author":"Allam","year":"2012","journal-title":"Int. J. Res. Rev. Inf. Sci. (IJRRIS)"},{"year":"2024","series-title":"Do large language models discriminate in hiring decisions on the basis of race, ethnicity, and gender?","author":"An","key":"10.1016\/j.nlp.2025.100151_b2"},{"year":"2023","series-title":"Sentence_similarity_semantic_search","author":"Ansari","key":"10.1016\/j.nlp.2025.100151_b3"},{"key":"10.1016\/j.nlp.2025.100151_b4","series-title":"International Conference on Intelligent Computing and Networking","first-page":"545","article-title":"Generative artificial intelligence: Opportunities and challenges of large language models","author":"Barreto","year":"2023"},{"key":"10.1016\/j.nlp.2025.100151_b5","doi-asserted-by":"crossref","unstructured":"Bender,\u00a0E.M., Gebru,\u00a0T., McMillan-Major,\u00a0A., Shmitchell,\u00a0S., 2021. On the dangers of stochastic parrots: Can language models be too big?. In: Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency. pp. 610\u2013623.","DOI":"10.1145\/3442188.3445922"},{"key":"10.1016\/j.nlp.2025.100151_b6","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"year":"2019","series-title":"PARAPHRASE \u2014 meaning in the Cambridge English Dictionary","author":"Cambridge Dictionary","key":"10.1016\/j.nlp.2025.100151_b7"},{"key":"10.1016\/j.nlp.2025.100151_b8","article-title":"Parrot: Paraphrase generation for nlu","author":"Damodaran","year":"2021","journal-title":"Parrot: Paraphrase Gener. Nlu"},{"key":"10.1016\/j.nlp.2025.100151_b9","unstructured":"Dolan,\u00a0B., Brockett,\u00a0C., 2005. Automatically Constructing a Corpus of Sentential Paraphrases. In: Third International Workshop on Paraphrasing. IWP2005, URL: https:\/\/www.microsoft.com\/en-us\/research\/publication\/automatically-constructing-a-corpus-of-sentential-paraphrases\/."},{"key":"10.1016\/j.nlp.2025.100151_b10","doi-asserted-by":"crossref","unstructured":"Dou,\u00a0Y., Jiang,\u00a0C., Xu,\u00a0W., 2022. Improving Large-scale Paraphrase Acquisition and Generation. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. pp. 9301\u20139323.","DOI":"10.18653\/v1\/2022.emnlp-main.631"},{"key":"10.1016\/j.nlp.2025.100151_b11","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113679","article-title":"Automatic text summarization: A comprehensive survey","volume":"165","author":"El-Kassas","year":"2021","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.nlp.2025.100151_b12","doi-asserted-by":"crossref","DOI":"10.7717\/peerj-cs.443","article-title":"Feature-based detection of automated language models: tackling GPT-2, GPT-3 and grover","volume":"7","author":"Fr\u00f6hling","year":"2021","journal-title":"PeerJ Comput. Sci."},{"key":"10.1016\/j.nlp.2025.100151_b13","doi-asserted-by":"crossref","unstructured":"Gao,\u00a0T., Yao,\u00a0X., Chen,\u00a0D., 2021. SimCSE: Simple Contrastive Learning of Sentence Embeddings. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing. pp. 6894\u20136910.","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"issue":"28","key":"10.1016\/j.nlp.2025.100151_b14","doi-asserted-by":"crossref","first-page":"43089","DOI":"10.1007\/s11042-023-15224-0","article-title":"A systematic survey on automated text generation tools and techniques: application, evaluation, and challenges","volume":"82","author":"Goyal","year":"2023","journal-title":"Multimedia Tools Appl.","ISSN":"https:\/\/id.crossref.org\/issn\/1380-7501","issn-type":"print"},{"key":"10.1016\/j.nlp.2025.100151_b15","doi-asserted-by":"crossref","unstructured":"Hutchinson,\u00a0B., Prabhakaran,\u00a0V., Denton,\u00a0E., Webster,\u00a0K., Zhong,\u00a0Y., Denuyl,\u00a0S., 2020. Social Biases in NLP Models as Barriers for Persons with Disabilities. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. pp. 5491\u20135501.","DOI":"10.18653\/v1\/2020.acl-main.487"},{"issue":"1","key":"10.1016\/j.nlp.2025.100151_b16","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1111\/beer.12479","article-title":"Ethical implications of text generation in the age of artificial intelligence","volume":"32","author":"Illia","year":"2023","journal-title":"Bus. Ethics Environ. Responsib."},{"key":"10.1016\/j.nlp.2025.100151_b17","series-title":"CS & IT Conference Proceedings, vol. 14, no. 4","article-title":"Parafusion: A large-scale LLM-driven english paraphrase dataset infused with high-quality lexical and syntactic diversity","author":"Jayawardena","year":"2024"},{"key":"10.1016\/j.nlp.2025.100151_b18","series-title":"International Conference on Machine Learning","first-page":"17061","article-title":"A watermark for large language models","author":"Kirchenbauer","year":"2023"},{"issue":"1","key":"10.1016\/j.nlp.2025.100151_b19","doi-asserted-by":"crossref","first-page":"104","DOI":"10.1017\/XPS.2020.37","article-title":"All the news that\u2019s fit to fabricate: AI-generated text as a tool of media misinformation","volume":"9","author":"Kreps","year":"2022","journal-title":"J. Exp. Political Sci."},{"key":"10.1016\/j.nlp.2025.100151_b20","article-title":"Paraphrasing evades detectors of ai-generated text, but retrieval is an effective defense","volume":"36","author":"Krishna","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"year":"2019","series-title":"BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension","author":"Lewis","key":"10.1016\/j.nlp.2025.100151_b21"},{"year":"2024","series-title":"Leveraging large language models for nlg evaluation: A survey","author":"Li","key":"10.1016\/j.nlp.2025.100151_b22"},{"key":"10.1016\/j.nlp.2025.100151_b23","doi-asserted-by":"crossref","unstructured":"Lin,\u00a0S., Hilton,\u00a0J., Evans,\u00a0O., 2022. TruthfulQA: Measuring How Models Mimic Human Falsehoods. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). pp. 3214\u20133252.","DOI":"10.18653\/v1\/2022.acl-long.229"},{"issue":"3","key":"10.1016\/j.nlp.2025.100151_b24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1380584.1380586","article-title":"Statistical machine translation","volume":"40","author":"Lopez","year":"2008","journal-title":"ACM Comput. Surv."},{"year":"2020","series-title":"Abhimishra91\/transformers-tutorials","author":"Mishra","key":"10.1016\/j.nlp.2025.100151_b25"},{"key":"10.1016\/j.nlp.2025.100151_b26","series-title":"International Conference on Machine Learning","first-page":"24950","article-title":"Detectgpt: Zero-shot machine-generated text detection using probability curvature","author":"Mitchell","year":"2023"},{"issue":"4","key":"10.1016\/j.nlp.2025.100151_b27","first-page":"344","article-title":"Search and learning for unsupervised text generation","volume":"43","author":"Mou","year":"2022","journal-title":"AI Mag."},{"key":"10.1016\/j.nlp.2025.100151_b28","series-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","article-title":"Don\u2019t give me the details, just the summary! topic-aware convolutional neural networks for extreme summarization","author":"Narayan","year":"2018"},{"year":"2023","series-title":"New and improved embedding model","author":"OpenAI","key":"10.1016\/j.nlp.2025.100151_b29"},{"year":"2016","series-title":"Reddit news users more likely to be male, Young and digital in their news preferences","author":"Pew Research Center\u2019s Journalism Project","key":"10.1016\/j.nlp.2025.100151_b30"},{"key":"10.1016\/j.nlp.2025.100151_b31","series-title":"International Conference on Machine Learning","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2023"},{"key":"10.1016\/j.nlp.2025.100151_b32","doi-asserted-by":"crossref","DOI":"10.1007\/s11042-024-18359-w","article-title":"Identification of paraphrased text in research articles through improved embeddings and fine-tuned BERT model","volume":"83","author":"Razaq","year":"2024","journal-title":"Multimedia Tools Appl."},{"year":"2023","series-title":"Can AI-generated text be reliably detected?","author":"Sadasivan","key":"10.1016\/j.nlp.2025.100151_b33"},{"year":"2023","series-title":"30+ Google bard statistics 2023 (trends & demographics)","author":"Shewale","key":"10.1016\/j.nlp.2025.100151_b34"},{"year":"2019","series-title":"Release strategies and the social impacts of language models","author":"Solaiman","key":"10.1016\/j.nlp.2025.100151_b35"},{"year":"2023","series-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","key":"10.1016\/j.nlp.2025.100151_b36"},{"year":"2019","series-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding","author":"Wang","key":"10.1016\/j.nlp.2025.100151_b37"},{"year":"2020","series-title":"Attacking neural text detectors","author":"Wolff","key":"10.1016\/j.nlp.2025.100151_b38"},{"key":"10.1016\/j.nlp.2025.100151_b39","article-title":"Defending against neural fake news","volume":"32","author":"Zellers","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.nlp.2025.100151_b40","series-title":"International Conference on Machine Learning","first-page":"41092","article-title":"Prompting large language model for machine translation: A case study","author":"Zhang","year":"2023"},{"key":"10.1016\/j.nlp.2025.100151_b41","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1162\/tacl_a_00632","article-title":"Benchmarking large language models for news summarization","volume":"12","author":"Zhang","year":"2024","journal-title":"Trans. Assoc. Comput. Linguist."},{"year":"2022","series-title":"Opt: Open pre-trained transformer language models","author":"Zhang","key":"10.1016\/j.nlp.2025.100151_b42"},{"key":"10.1016\/j.nlp.2025.100151_b43","doi-asserted-by":"crossref","DOI":"10.1109\/TIFS.2024.3455775","article-title":"Silent guardian: Protecting text from malicious exploitation by large language models","author":"Zhao","year":"2024","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"key":"10.1016\/j.nlp.2025.100151_b44","doi-asserted-by":"crossref","DOI":"10.3389\/frai.2023.1350306","article-title":"Natural language processing in the era of large language models","volume":"6","author":"Zubiaga","year":"2024","journal-title":"Front. Artif. Intell."}],"container-title":["Natural Language Processing Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2949719125000275?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2949719125000275?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T07:03:29Z","timestamp":1750230209000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2949719125000275"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":44,"alternative-id":["S2949719125000275"],"URL":"https:\/\/doi.org\/10.1016\/j.nlp.2025.100151","relation":{},"ISSN":["2949-7191"],"issn-type":[{"type":"print","value":"2949-7191"}],"subject":[],"published":{"date-parts":[[2025,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Understanding the effects of human-written paraphrases in LLM-generated text detection","name":"articletitle","label":"Article Title"},{"value":"Natural Language Processing Journal","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.nlp.2025.100151","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"100151"}}