{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T05:30:52Z","timestamp":1782279052805,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706599.3719712","type":"proceedings-article","created":{"date-parts":[[2025,4,23]],"date-time":"2025-04-23T20:48:52Z","timestamp":1745441332000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["\"When AI Writes Personas\": Analyzing Lexical Diversity in LLM-Generated Persona Descriptions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8708-0876","authenticated-orcid":false,"given":"Sankalp","family":"Sethi","sequence":"first","affiliation":[{"name":"College of Information Science, University of Arizona, Tucson, Arizona, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3230-0561","authenticated-orcid":false,"given":"Joni","family":"Salminen","sequence":"additional","affiliation":[{"name":"University of Vaasa, Vaasa, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7597-2267","authenticated-orcid":false,"given":"Danial","family":"Amin","sequence":"additional","affiliation":[{"name":"University of Vaasa, Vaasa, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6468-6609","authenticated-orcid":false,"given":"Bernard J","family":"Jansen","sequence":"additional","affiliation":[{"name":"Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"William Babonnaud Estelle Delouche and Mounir Lahlouh. 2024. The Bias that Lies Beneath: Qualitative Uncovering of Stereotypes in Large Language Models. Swedish Artificial Intelligence Society (2024) 195\u2013203.","DOI":"10.3384\/ecp208022"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Jason Baronova Catherine Stevens Logan Tennant and Alfred MacPhee. 2024. Dynamic context-aware representation for semantic alignment in large language models. (2024).","DOI":"10.31219\/osf.io\/svcn3"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","unstructured":"Yves Bestgen. 2024. Back to Basics in Measuring Lexical Diversity: Too Simple to Be True. Applied Linguistics 45 5 (Oct. 2024) 926\u2013932. 10.1093\/applin\/amae053","DOI":"10.1093\/applin\/amae053"},{"key":"e_1_3_3_2_5_2","volume-title":"Natural language processing with Python: analyzing text with the natural language toolkit","author":"Bird Steven","year":"2009","unstructured":"Steven Bird, Ewan Klein, and Edward Loper. 2009. Natural language processing with Python: analyzing text with the natural language toolkit. \" O\u2019Reilly Media, Inc.\"."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Bart Braden. 1986. The surveyor\u2019s area formula. The College Mathematics Journal 17 4 (1986) 326\u2013337.","DOI":"10.1080\/07468342.1986.11972974"},{"key":"e_1_3_3_2_7_2","first-page":"113","volume-title":"Understanding Natural Language Understanding","author":"Cambria Erik","year":"2024","unstructured":"Erik Cambria. 2024. Semantics Processing. In Understanding Natural Language Understanding. Springer, 113\u2013228."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","unstructured":"Chris Chapman Edwin Love Russell\u00a0P. Milham Paul ElRif and James\u00a0L. Alford. 2008. Quantitative Evaluation of Personas as Information. Proceedings of the Human Factors and Ergonomics Society Annual Meeting 52 16 (Sept. 2008) 1107\u20131111. 10.1177\/154193120805201602","DOI":"10.1177\/154193120805201602"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-322-99786-9_1"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","unstructured":"Kevin\u00a0T. Cunningham and Katarina\u00a0L. Haley. 2020. Measuring Lexical Diversity for Discourse Analysis in Aphasia: Moving-Average Type\u2013Token Ratio and Word Information Measure. Journal of Speech Language and Hearing Research 63 3 (2020) 710\u2013721. 10.1044\/2019_JSLHR-19-00226","DOI":"10.1044\/2019_JSLHR-19-00226"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-78092-0_3"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Gerasimos Fergadiotis Heather Wright and Thomas West. 2013. Measuring Lexical Diversity in Narrative Discourse of People With Aphasia. American Journal of Speech-Language Pathology 22 (05 2013) S397\u2013S408. 10.1044\/1058-0360(2013\/12-0083)","DOI":"10.1044\/1058-0360(2013\/12-0083"},{"key":"e_1_3_3_2_13_2","volume-title":"A designer\u2019s research manual: Succeed in design by knowing your clients and what they really need","author":"Goodman Jennifer","year":"2012","unstructured":"Jennifer Goodman and Michelle Broome. 2012. A designer\u2019s research manual: Succeed in design by knowing your clients and what they really need. Rockport Publishers."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","unstructured":"Joy Ai-Leen Goodman-Deane Mike Bradley Sam Waller and P.\u00a0John Clarkson. 2021. Developing personas to help designers to understand digital exclusion. 1 (2021) 1203\u20131212. 10.1017\/pds.2021.120 Publisher: Cambridge University Press.","DOI":"10.1017\/pds.2021.120"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-012566251-2\/50013-7"},{"key":"e_1_3_3_2_16_2","first-page":"144","volume-title":"Proceedings of Participation and Design Conference (PDC2002)","volume":"2","author":"Grudin Jonathan","year":"2002","unstructured":"Jonathan Grudin and John Pruitt. 2002. Personas, participatory design and product development: An infrastructure for engagement. In Proceedings of Participation and Design Conference (PDC2002) , Vol.\u00a02. Sweden, 144\u2013161."},{"key":"e_1_3_3_2_17_2","unstructured":"Yanzhu Guo Guokan Shang and Chlo\u00e9 Clavel. 2024. Benchmarking Linguistic Diversity of Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.10271 (2024)."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Hye\u00a0Seung Ha. 2019. Lexical Richness in EFL Undergraduate Students\u2019 Academic Writing. English Teaching 74 3 (2019) 3\u201328.","DOI":"10.15858\/engtea.74.3.201909.3"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580688"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300720"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Paul Hoffman Matthew\u00a0A Lambon\u00a0Ralph and Timothy\u00a0T Rogers. 2013. Semantic diversity: A measure of semantic ambiguity based on variability in the contextual usage of words. Behavior research methods 45 (2013) 718\u2013730.","DOI":"10.3758\/s13428-012-0278-x"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","unstructured":"Pei-Fang Hsu Yu-Han Lu Shih-Chu Chen and Patricia Pei-Yi Kuo. 2024. Creating and validating predictive personas for target marketing. International Journal of Human-Computer Studies 181 (2024) 103147. 10.1016\/j.ijhcs.2023.103147","DOI":"10.1016\/j.ijhcs.2023.103147"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Laiba Husain Teresa Finlay Arqam Husain Joseph Wherton Gemma Hughes and Trisha Greenhalgh. 2024. Developing user personas to capture intersecting dimensions of disadvantage in older patients who are marginalised: a qualitative study. British Journal of General Practice 74 741 (2024) e250\u2013e257.","DOI":"10.3399\/BJGP.2023.0412"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Bernard\u00a0J Jansen Soon-gyo Jung and Joni Salminen. 2024. Finetuning analytics information systems for a better understanding of users: evidence of personification bias on multiple digital channels. Information Systems Frontiers 26 2 (2024) 775\u2013798.","DOI":"10.1007\/s10796-023-10395-5"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Bernard\u00a0J Jansen Joni Salminen Soon-gyo Jung and Kathleen Guan. 2021. Challenges of Applying Data-Driven Persona Development. Data-Driven Personas (2021) 139\u2013158.","DOI":"10.1007\/978-3-031-02231-9_6"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","unstructured":"Scott Jarvis. 2013. Capturing the Diversity in Lexical Diversity. Language Learning 63 s1 (March 2013) 87\u2013106. 10.1111\/j.1467-9922.2012.00739.x","DOI":"10.1111\/j.1467-9922.2012.00739.x"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Soon-Gyo Jung Joni Salminen Kholoud\u00a0Khalil Aldous and Bernard\u00a0J Jansen. 2025. PersonaCraft: Leveraging language models for data-driven persona development. International Journal of Human-Computer Studies 197 (2025) 103445.","DOI":"10.1016\/j.ijhcs.2025.103445"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3631700.3664882"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658975"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/2661685.2661691"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300565"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300565"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","unstructured":"Gonzalo Mart\u00ednez Jos\u00e9\u00a0Alberto Hern\u00e1ndez Javier Conde Pedro Reviriego and Elena Merino-G\u00f3mez. 2024. Beware of Words: Evaluating the Lexical Diversity of Conversational LLMs using ChatGPT as Case Study. ACM Trans. Intell. Syst. Technol. (Sept. 2024). 10.1145\/3696459 Just Accepted.","DOI":"10.1145\/3696459"},{"key":"e_1_3_3_2_34_2","unstructured":"Philip\u00a0M McCarthy. 2005. An assessment of the range and usefulness of lexical diversity measures and the potential of the measure of textual lexical diversity (MTLD). Ph.\u00a0D. Dissertation. The University of Memphis."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Philip\u00a0M McCarthy and Scott Jarvis. 2007. vocd: A theoretical and empirical evaluation. Language Testing 24 4 (2007) 459\u2013488.","DOI":"10.1177\/0265532207080767"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","unstructured":"Philip\u00a0M. McCarthy and Scott Jarvis. 2010. MTLD vocd-D and HD-D: A validation study of sophisticated approaches to lexical diversity assessment. Behavior Research Methods 42 2 (May 2010) 381\u2013392. 10.3758\/BRM.42.2.381","DOI":"10.3758\/BRM.42.2.381"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-58527-2_7"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Farooq Mubarak Reima Suomi and Satu-P\u00e4ivi Kantola. 2020. Confirming the links between socio-economic variables and digitalization worldwide: the unsettled debate on digital divide. Journal of Information Communication and Ethics in Society 18 3 (2020) 415\u2013430.","DOI":"10.1108\/JICES-02-2019-0021"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300880"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-4084-9"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-85616-8_20"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/2556288.2557080"},{"key":"e_1_3_3_2_43_2","unstructured":"Naseela Pervez and Alexander\u00a0J. Titus. 2024. Inclusivity in Large Language Models: Personality Traits and Gender Bias in Scientific Abstracts. arxiv:https:\/\/arXiv.org\/abs\/2406.19497\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2406.19497"},{"key":"e_1_3_3_2_44_2","unstructured":"Alexander Pfaff. 2024. How to measure syntactic diversity: Patternization methods algorithms. Noun phrases in early Germanic languages (2024) 33\u201370."},{"key":"e_1_3_3_2_45_2","unstructured":"Esther Ploeger Huiyuan Lai Rik van Noord and Antonio Toral. 2024. Towards Tailored Recovery of Lexical Diversity in Literary Machine Translation. arxiv:https:\/\/arXiv.org\/abs\/2408.17308\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2408.17308"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3636293"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.5555\/1766311.1766341"},{"key":"e_1_3_3_2_48_2","unstructured":"Cynthia Putnam Emma\u00a0J. Rose Erica\u00a0J. Johnson and Beth\u00a0E. Kolko. 2009. Adapting User-Centered Design Methods to Design for Diverse Populations. Information Technologies and International Development 5 (2009) 51\u201374. https:\/\/api.semanticscholar.org\/CorpusID:55242592"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/VIS54172.2023.00056"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Pedro Reviriego Javier Conde Elena Merino-G\u00f3mez Gonzalo Mart\u00ednez and Jos\u00e9\u00a0Alberto Hern\u00e1ndez. 2024. Playing with words: Comparing the vocabulary and lexical diversity of ChatGPT and humans. Machine Learning with Applications 18 (2024) 100602.","DOI":"10.1016\/j.mlwa.2024.100602"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","unstructured":"Joni Salminen Kamal Chhirang Soon-Gyo Jung Saravanan Thirumuruganathan Kathleen\u00a0W. Guan and Bernard\u00a0J. Jansen. 2022. Big Data Small Personas: How Algorithms Shape the Demographic Representation of Data-Driven User Segments. (2022). 10.1089\/big.2021.0177 Publisher: Mary Ann Liebert Inc. publishers 140 Huguenot Street 3rd Floor New Rochelle NY 10801 USA.","DOI":"10.1089\/big.2021.0177"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-48057-7_14"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50334-5_6"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3546155.3546654"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642036"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-48038-6_18"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3650860"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-37131-84"},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","unstructured":"Lucas Shen. 2022. LexicalRichness: A small module to compute textual lexical richness. 10.5281\/zenodo.6607007","DOI":"10.5281\/zenodo.6607007"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643834.3660729"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","unstructured":"Phillip\u00a0Douglas Stevenson and Christopher\u00a0Andrew Mattson. 2019. The Personification of Big Data. Proceedings of the Design Society: International Conference on Engineering Design 1 1 (July 2019) 4019\u20134028. 10.1017\/dsi.2019.409","DOI":"10.1017\/dsi.2019.409"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.25"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"crossref","unstructured":"Phil Turner and Susan Turner. 2011. Is stereotyping inevitable when designing with personas? Design studies 32 1 (2011) 30\u201344.","DOI":"10.1016\/j.destud.2010.06.002"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"crossref","unstructured":"Fiona\u00a0J. Tweedie and R.\u00a0Harald Baayen. 1998. How Variable May a Constant Be? Measures of Lexical Richness in Perspective. Computers and the Humanities 32 5 (1998) 323\u2013352. https:\/\/www.jstor.org\/stable\/30200474 Publisher: Springer.","DOI":"10.1023\/A:1001749303137"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"crossref","unstructured":"Ji\u00a0Seung Yang Carly Rosvold and Nan Bernstein\u00a0Ratner. 2022. Measurement of lexical diversity in children\u2019s spoken language: Computational and conceptual considerations. Frontiers in psychology 13 (2022) 905789.","DOI":"10.3389\/fpsyg.2022.905789"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10651409"},{"key":"e_1_3_3_2_67_2","unstructured":"Wanwan Zheng and Mingzhe Jin. 2024. Evaluate Lexical Richness Measures Using Coefficient of Variation and Relative Value. (2024). http:\/\/www.cicling.org\/2018\/intranet\/pre-print\/papers\/paper_1.pdf Working paper. Accessed: 29-Dec-2024."}],"event":{"name":"CHI EA '25: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI EA '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706599.3719712","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706599.3719712","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:43Z","timestamp":1750295923000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706599.3719712"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":66,"alternative-id":["10.1145\/3706599.3719712","10.1145\/3706599"],"URL":"https:\/\/doi.org\/10.1145\/3706599.3719712","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}