{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T14:03:17Z","timestamp":1770818597136,"version":"3.50.1"},"reference-count":92,"publisher":"Oxford University Press (OUP)","issue":"3","license":[{"start":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T00:00:00Z","timestamp":1717632000000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100019779","name":"Qatar National Library","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100019779","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>This study explores the feasibility of cross-linguistic authorship attribution and the author\u2019s gender identification using Machine Translation (MT). Computational stylistics experiments were conducted on a Greek blog corpus translated into English using Google\u2019s Neural MT. A Random Forest algorithm was employed for authorship and gender profiling, using different feature groups [Author\u2019s Multilevel N-gram Profiles, quantitative linguistics (QL), and cross-lingual word embeddings (CLWE)] in both original and translated texts. Results indicate that MT is a viable method for converting a multilingual corpus into one language for authorship attribution and gender profiling research, with considerable accuracy when training and testing datasets use identical language. In the pure cross-linguistic scenario, higher accuracies than the baselines were obtained using CLWE and QL features.<\/jats:p>","DOI":"10.1093\/llc\/fqae028","type":"journal-article","created":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T06:13:48Z","timestamp":1717654428000},"page":"954-967","source":"Crossref","is-referenced-by-count":5,"title":["Cross-linguistic authorship attribution and gender profiling. Machine translation as a method for bridging the language gap"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4093-5973","authenticated-orcid":false,"given":"George","family":"Mikros","sequence":"first","affiliation":[{"name":"Department of Middle Eastern Studies, Hamad Bin Khalifa University , Doha, Qatar"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4113-3311","authenticated-orcid":false,"given":"Dimitris","family":"Boumparis","sequence":"additional","affiliation":[{"name":"University of Antwerp , Antwerp, Belgium"}]}],"member":"286","published-online":{"date-parts":[[2024,6,5]]},"reference":[{"key":"2024090307515476400_fqae028-B1","author":"Alroobaea","year":"2020"},{"key":"2024090307515476400_fqae028-B2","first-page":"226","volume-title":"Speech and Computer. SPECOM 2015","author":"Aravantinou","year":"2015"},{"key":"2024090307515476400_fqae028-B3","author":"Argamon","year":"2005"},{"key":"2024090307515476400_fqae028-B4","doi-asserted-by":"crossref","first-page":"802","DOI":"10.1002\/asi.20553","article-title":"Stylistic Text Classification Using Functional Lexical Features","volume":"58","author":"Argamon","year":"2007","journal-title":"Journal of American Society for Information Science and Technology"},{"key":"2024090307515476400_fqae028-B5","doi-asserted-by":"crossref","first-page":"597","DOI":"10.1162\/tacl_a_00288","article-title":"Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond\u2019,","volume":"7","author":"Artetxe","year":"2019","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2024090307515476400_fqae028-B6","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1007\/978-3-030-86337-1_15","volume-title":"Document Analysis and Recognition\u2014ICDAR 2021","author":"Badirli","year":"2021"},{"key":"2024090307515476400_fqae028-B7","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1111\/josl.12080","article-title":"Gender Identity and Lexical Variation in Social Media\u2019,","volume":"18","author":"Bamman","year":"2014","journal-title":"Journal of Sociolinguistics"},{"key":"2024090307515476400_fqae028-B8","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1007\/978-3-030-49161-1_22","volume-title":"Artificial Intelligence Applications and Innovations","author":"Barlas","year":"2020"},{"key":"2024090307515476400_fqae028-B9","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1093\/llc\/fqi039","article-title":"A New Approach to the Study of Translationese: Machine-learning the Difference between Original and Translated Text\u2019,","volume":"21","author":"Baroni","year":"2005","journal-title":"Literary and Linguistic Computing"},{"key":"2024090307515476400_fqae028-B10","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1080\/016909697386628","article-title":"On the Inseparability of Grammar and the Lexicon: Evidence from the Acquisition, Aphasia and Real-time Processing\u2019,","volume":"12","author":"Bates","year":"1997","journal-title":"Language and Cognitive Processes"},{"key":"2024090307515476400_fqae028-B11","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511541933","volume-title":"Power Analysis for Experimental Research: A Practical Guide for the Biological, Medical and Social Sciences","author":"Bausell","year":"2002"},{"key":"2024090307515476400_fqae028-B12","first-page":"382","author":"Bayot","year":"2016"},{"key":"2024090307515476400_fqae028-B13","volume-title":"Spotting Translationese. A Corpus-Driven Approach Using Support Vector Machines","author":"Bernardini","year":"2005"},{"key":"2024090307515476400_fqae028-B14","first-page":"2015","author":"Bogdanova","year":"2014"},{"key":"2024090307515476400_fqae028-B15","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1162\/tacl_a_00051","article-title":"Enriching Word Vectors with Subword Information\u2019,","volume":"5","author":"Bojanowski","year":"2017","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2024090307515476400_fqae028-B16","volume-title":"Identifying Crosswriters\u2019 Altering Style in Books for Children and Adults Using Supervised Machine Learning","author":"Boumparis","year":"."},{"key":"2024090307515476400_fqae028-B17","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","article-title":"Random Forests\u2019,","volume":"45","author":"Breiman","year":"2001","journal-title":"Machine Learning"},{"key":"2024090307515476400_fqae028-B18","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1093\/applin\/24.2.197","article-title":"Lexical Richness in the Spontaneous Speech of Bilinguals\u2019,","volume":"24","author":"Daller","year":"2003","journal-title":"Applied Linguistics"},{"key":"2024090307515476400_fqae028-B19","doi-asserted-by":"crossref","first-page":"610","DOI":"10.1108\/OIR-09-2015-0308","article-title":"Utilizing Facebook Pages of the Political Parties to Automatically Predict the Political Orientation of Facebook Users\u2019,","volume":"40","author":"David","year":"2016","journal-title":"Online Information Review"},{"key":"2024090307515476400_fqae028-B20","author":"Dias","year":"2018"},{"key":"2024090307515476400_fqae028-B21","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40734-015-0017-4","article-title":"Lexical Diversity in Parkinson\u2019s Disease\u2019,","volume":"2","author":"Ellis","year":"2015","journal-title":"Journal of Clinical Movement Disorders"},{"key":"2024090307515476400_fqae028-B22","first-page":"127","author":"Fabien","year":"2020"},{"key":"2024090307515476400_fqae028-B23","first-page":"878","author":"Feng","year":"2022"},{"key":"2024090307515476400_fqae028-B24","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1044\/1058-0360(2013\/12-0083)","article-title":"Measuring Lexical Diversity in Narrative Discourse of People with Aphasia\u2019,","volume":"22","author":"Fergadiotis","year":"2013","journal-title":"American Journal of Speech-Language Pathology"},{"key":"2024090307515476400_fqae028-B25","author":"Franco-Salvador","year":"2017"},{"key":"2024090307515476400_fqae028-B26","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1075\/babel.31.2.19tra","article-title":"Translation. Literary, Linguistic, and Philosophical Perspectives\u2019,","volume":"31","author":"Frawley","year":"1985","journal-title":"Babel"},{"key":"2024090307515476400_fqae028-B27","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1093\/llc\/fql048","article-title":"Function Words in Authorship Attribution Studies\u2019,","volume":"22","author":"Garc\u00eda","year":"2007","journal-title":"Literary and Linguistic Computing"},{"key":"2024090307515476400_fqae028-B28","first-page":"88","author":"Gellerstam","year":"1986"},{"key":"2024090307515476400_fqae028-B29","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1093\/llc\/fqm020","article-title":"Quantitative Authorship Attribution: An Evaluation of Techniques\u2019,","volume":"22","author":"Grieve","year":"2007","journal-title":"Literary and Linguistic Computing"},{"key":"2024090307515476400_fqae028-B30","doi-asserted-by":"crossref","first-page":"16569","DOI":"10.1073\/pnas.0507655102","article-title":"An Index to Quantify an Individual\u2019s Scientific Research Output\u2019,","volume":"102","author":"Hirsch","year":"2005","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"key":"2024090307515476400_fqae028-B31","first-page":"274","author":"Hoenen","year":"2017"},{"key":"2024090307515476400_fqae028-B32","first-page":"255","article-title":"Authorship Identification Ising Random Forests\u2019,","volume":"55","author":"Jin","year":"2007","journal-title":"Proceedings of the Institute of Statistical Mathematics"},{"key":"2024090307515476400_fqae028-B33","first-page":"427","author":"Joulin","year":"2017"},{"key":"2024090307515476400_fqae028-B34","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1002\/asi.24073","article-title":"A Comparative Assessment of the Difficulty of Authorship Attribution in Greek and in English\u2019,","volume":"70","author":"Juola","year":"2019","journal-title":"Journal of the Association for Information Science and Technology"},{"key":"2024090307515476400_fqae028-B35","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1080\/09296174.2018.1458395","article-title":"Correlations and Potential Cross-Linguistic Indicators of Writing Style\u2019,","volume":"26","author":"Juola","year":"2019","journal-title":"Journal of Quantitative Linguistics"},{"key":"2024090307515476400_fqae028-B36","first-page":"59","author":"Kestemont","year":"2014"},{"key":"2024090307515476400_fqae028-B37","doi-asserted-by":"crossref","first-page":"401","DOI":"10.1093\/llc\/17.4.401","article-title":"Automatically Categorizing Written Texts by Author Gender\u2019,","volume":"17","author":"Koppel","year":"2002","journal-title":"Literary and Linguistic Computing"},{"key":"2024090307515476400_fqae028-B38","volume-title":"QUITA: Quantitative Index Text Analyzer","author":"Kub\u00e1t","year":"2014"},{"key":"2024090307515476400_fqae028-B39","first-page":"177","volume-title":"Applied Soft Computing: Tecniques and Applications","author":"Kumar","year":"2022"},{"key":"2024090307515476400_fqae028-B40","author":"Lample","year":"2018"},{"key":"2024090307515476400_fqae028-B41","first-page":"111","article-title":"Stylometric Comparative Analysis of Style in Human vs. Machine Literary Translations\u2019,","volume":"20","author":"Lee","year":"2019","journal-title":"The Journal of Translation Studies"},{"key":"2024090307515476400_fqae028-B42","author":"Loh","year":"2016"},{"key":"2024090307515476400_fqae028-B43","first-page":"1","volume-title":"Modelling and Assessing Vocabulary Knowledge","author":"Long","year":"2007"},{"key":"2024090307515476400_fqae028-B44","doi-asserted-by":"crossref","first-page":"102227","DOI":"10.1016\/j.ipm.2020.102227","article-title":"Richer Document Embeddings for Author Profiling Tasks Based on a Heuristic Search\u2019,","volume":"57","author":"L\u00f3pez-Santill\u00e1n","year":"2020","journal-title":"Information Processing & Management"},{"key":"2024090307515476400_fqae028-B45","author":"Lundeqvist","year":"2017"},{"key":"2024090307515476400_fqae028-B46","first-page":"513","author":"Luyckx","year":"2008"},{"key":"2024090307515476400_fqae028-B47","author":"McCollister","year":"2016"},{"key":"2024090307515476400_fqae028-B48","doi-asserted-by":"crossref","first-page":"392","DOI":"10.2307\/1932674","article-title":"An Index of Diversity and the Relation of Certain Concepts to Diversity\u2019,","volume":"48","author":"McIntosh","year":"1967","journal-title":"Ecology"},{"key":"2024090307515476400_fqae028-B49","first-page":"71","volume-title":"Words and Numbers. In Memory of Peter Grzybek (1957\u20132019)","author":"Mikros","year":"2020"},{"key":"2024090307515476400_fqae028-B50","first-page":"85","author":"Mikros","year":"2018"},{"key":"2024090307515476400_fqae028-B51","first-page":"21","volume-title":"Methods and Applications of Quantitative Linguistics in Belgrade, Serbia, April 16-19, 2012","author":"Mikros","year":"2013"},{"key":"2024090307515476400_fqae028-B52","first-page":"206","volume-title":"Issues in Quantitative Linguistics 3. Dedicated to Karl-Heinz Best on the Occasion of his 70th Birthday","author":"Mikros","year":"2013"},{"key":"2024090307515476400_fqae028-B53","first-page":"17","author":"Mikros","year":"2013"},{"key":"2024090307515476400_fqae028-B54","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1515\/9783110420296-008","volume-title":"Recent Contributions to Quantitative Linguistics","author":"Mikros","year":"2015"},{"key":"2024090307515476400_fqae028-B55","article-title":"Surveying Stylometry Techniques and Applications\u2019,","volume":"50","author":"Neal","year":"2017","journal-title":"ACM Computing Surveys"},{"key":"2024090307515476400_fqae028-B56","volume-title":"Handbook of Semiotics","author":"N\u00f6th","year":"1995"},{"key":"2024090307515476400_fqae028-B57","volume-title":"The Language of Weblogs: A Study of Genre and Individual Differences","author":"Nowson","year":"2006"},{"key":"2024090307515476400_fqae028-B58","volume-title":"Statistics for Corpus Linguistics","author":"Oakes","year":"1998"},{"key":"2024090307515476400_fqae028-B59","doi-asserted-by":"crossref","first-page":"927","DOI":"10.1044\/1092-4388(2002\/075)","article-title":"Lexical Diversity in the Spontaneous Speech of Children with Specific Language Impairment: Application of D\u2019,","volume":"45","author":"Owen","year":"2002","journal-title":"Journal of Speech Language and Hearing Research"},{"key":"2024090307515476400_fqae028-B60","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1515\/9783110894219.555","volume-title":"Exact Methods in the Study of Language and Text","author":"Popescu","year":"2007"},{"key":"2024090307515476400_fqae028-B61","volume-title":"Word Frequency Studies","author":"Popescu","year":"2009"},{"key":"2024090307515476400_fqae028-B62","first-page":"71","article-title":"Writer\u2019s View of Text Generation\u2019,","volume":"15","author":"Popescu","year":"2007","journal-title":"Glottometrics"},{"key":"2024090307515476400_fqae028-B63","first-page":"58","article-title":"On the Dynamics of Word Classes in Text\u2019,","volume":"14","author":"Popescu","year":"2007","journal-title":"Glottometrics"},{"key":"2024090307515476400_fqae028-B64","volume-title":"Vectors and Codes of Text","author":"Popescu","year":"2010"},{"key":"2024090307515476400_fqae028-B65","doi-asserted-by":"crossref","first-page":"627","DOI":"10.1007\/s00500-016-2446-x","article-title":"Application of the Distributed Document Representation in the Authorship Attribution Task for Small Corpora\u2019,","volume":"21","author":"Posadas-Dur\u00e1n","year":"2017","journal-title":"Soft Computing"},{"key":"2024090307515476400_fqae028-B66","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1016\/S0167-6393(98)00018-1","article-title":"A Study of N-gram and Decision Tree Letter Language Modeling Methods\u2019,","volume":"24","author":"Potamianos","year":"1998","journal-title":"Speech Communication"},{"key":"2024090307515476400_fqae028-B67","author":"Rangel","year":"2013"},{"key":"2024090307515476400_fqae028-B68","doi-asserted-by":"crossref","first-page":"44","DOI":"10.5120\/ijca2017914587","article-title":"Authorship Attribution on Imbalanced English Editorial Corpora\u2019,","volume":"169","author":"Rao","year":"2017","journal-title":"International Journal of Computer Applications"},{"key":"2024090307515476400_fqae028-B69","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511732942","volume-title":"Assessing Vocabulary","author":"Read","year":"2000"},{"key":"2024090307515476400_fqae028-B70","author":"Ruder","year":"2016"},{"key":"2024090307515476400_fqae028-B71","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1080\/13556509.2011.10799478","article-title":"Translator Style\u2019,","volume":"17","author":"Saldanha","year":"2011","journal-title":"The Translator"},{"key":"2024090307515476400_fqae028-B72","volume-title":"Neural and Non-neural Approaches to Authorship Attribution","author":"Sari","year":"2018"},{"key":"2024090307515476400_fqae028-B73","author":"Schaetti","year":"2017"},{"key":"2024090307515476400_fqae028-B74","first-page":"669","author":"Shrestha","year":"2017"},{"key":"2024090307515476400_fqae028-B75","first-page":"483","article-title":"Behavioral Profiling in Translation Studies\u2019,","volume":"8","author":"Smynor","year":"2015","journal-title":"Trans-Kom Zeitschrift F\u00fcr Translationswissenschaft Und Fachkommunikation"},{"key":"2024090307515476400_fqae028-B76","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1007\/978-3-319-98932-7_25","volume-title":"Experimental IR Meets Multilinguality, Multimodality, and Interaction","author":"Stamatatos","year":"2018"},{"key":"2024090307515476400_fqae028-B77","first-page":"1306","volume-title":"The SAGE Encyclopedia of Communication Research Methods","author":"Stoll","year":"2017"},{"key":"2024090307515476400_fqae028-B78","first-page":"1217","article-title":"Authorship Attribution of Cell-phone E-mail\u2019,","volume":"17","author":"Tanaka","year":"2014","journal-title":"International Journal on Information (Japan)"},{"key":"2024090307515476400_fqae028-B79","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1177\/1367006910381186","article-title":"Operationalizing and Measuring Language Dominance\u2019,","volume":"15","author":"Treffers-Daller","year":"2011","journal-title":"International Journal of Bilingualism"},{"key":"2024090307515476400_fqae028-B80","volume-title":"Language Dominance in Bilinguals: Issues of Measurement and Operationalization","author":"Treffers-Daller","year":"2015"},{"key":"2024090307515476400_fqae028-B81","first-page":"178","volume-title":"Proceedings of the International AAAI Conference on Web and Social Media","author":"Tumasjan","year":"2010"},{"key":"2024090307515476400_fqae028-B82","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1023\/A:1001749303137","article-title":"How Variable May a Constant Be? Measures of Lexical Richness in Perspective\u2019,","volume":"32","author":"Tweedie","year":"1998","journal-title":"Computers and the Humanities"},{"key":"2024090307515476400_fqae028-B83","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1080\/09296170500055350","article-title":"New Machine Learning Methods Demonstrate the Existence of a Human Stylome\u2019,","volume":"12","author":"van Halteren","year":"2005","journal-title":"Journal of Quantitative Linguistics"},{"key":"2024090307515476400_fqae028-B84","doi-asserted-by":"crossref","first-page":"192","DOI":"10.1016\/j.cortex.2013.10.010","article-title":"Data Modelling in Corpus Linguistics: How Low May WeGgo\u2019,","volume":"55","author":"van Velzen","year":"2014","journal-title":"Cortex"},{"key":"2024090307515476400_fqae028-B85","author":"Veenhoven","year":"2018"},{"key":"2024090307515476400_fqae028-B86","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2021.107815","article-title":"Exploring Syntactic and Semantic Features for Authorship Attribution\u2019,","volume":"111","author":"Wu","year":"2021","journal-title":"Applied Soft Computing"},{"key":"2024090307515476400_fqae028-B87","author":"Wu","year":"2016"},{"key":"2024090307515476400_fqae028-B88","first-page":"325","volume-title":"Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing","author":"Xu","year":"2004"},{"key":"2024090307515476400_fqae028-B89","volume-title":"The Statistical Study of Literary Vocabulary","author":"Yule","year":"1944"},{"key":"2024090307515476400_fqae028-B90","first-page":"649","author":"Zhang","year":"2015"},{"key":"2024090307515476400_fqae028-B91","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1007\/11562382_14","volume-title":"Information Retrieval Technology","author":"Zhao","year":"2005"},{"key":"2024090307515476400_fqae028-B92","doi-asserted-by":"crossref","first-page":"e1584","DOI":"10.1002\/wics.1584","article-title":"A Review on Authorship Attribution in Text Mining\u2019,","volume":"15","author":"Zheng","year":"2022","journal-title":"WIREs Computational Statistics"}],"container-title":["Digital Scholarship in the Humanities"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/dsh\/article-pdf\/39\/3\/954\/58997734\/fqae028.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/dsh\/article-pdf\/39\/3\/954\/58997734\/fqae028.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T12:33:47Z","timestamp":1725366827000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/dsh\/article\/39\/3\/954\/7688491"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,5]]},"references-count":92,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2024,6,5]]},"published-print":{"date-parts":[[2024,9,1]]}},"URL":"https:\/\/doi.org\/10.1093\/llc\/fqae028","relation":{},"ISSN":["2055-7671","2055-768X"],"issn-type":[{"value":"2055-7671","type":"print"},{"value":"2055-768X","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,9]]},"published":{"date-parts":[[2024,6,5]]}}}