{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T11:50:11Z","timestamp":1748001011760,"version":"3.37.3"},"reference-count":43,"publisher":"Oxford University Press (OUP)","issue":"2","license":[{"start":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T00:00:00Z","timestamp":1711324800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001823","name":"Czech Ministry of Education, Youth and Sports","doi-asserted-by":"crossref","award":["IGA_FF_2022_020"],"award-info":[{"award-number":["IGA_FF_2022_020"]}],"id":[{"id":"10.13039\/501100001823","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>Our work aims to evaluate the strength of the association between function words and several text types: novels, poems, academic articles, reviews, and blog posts, and the accuracy of their classification to these categories, through machine-learning and statistical methods. The principal conclusion is that the types of texts are distinguishable based only on the function words, either by vocabulary or vocabulary diversity. Such findings may impact the techniques of authorship attribution based on function words and text clustering techniques since some function words add information about the text types\/genres, in addition to content words.<\/jats:p>","DOI":"10.1093\/llc\/fqae013","type":"journal-article","created":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T23:24:06Z","timestamp":1711409046000},"page":"765-789","source":"Crossref","is-referenced-by-count":1,"title":["Beyond content: discriminatory power of function words in text type classification"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-6441-7795","authenticated-orcid":false,"given":"Kl\u00e1ra","family":"Vengla\u0159ov\u00e1","sequence":"first","affiliation":[{"name":"Department of General Linguistics, Palack\u00fd University , Olomouc, 779 00, Czech Republic"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4111-7382","authenticated-orcid":false,"given":"Vladim\u00edr","family":"Matlach","sequence":"additional","affiliation":[{"name":"Department of General Linguistics, Palack\u00fd University , Olomouc, 779 00, Czech Republic"}]}],"member":"286","published-online":{"date-parts":[[2024,3,25]]},"reference":[{"key":"2024061809544876000_fqae013-B1","doi-asserted-by":"publisher","DOI":"10.1515\/text.2003.014","article-title":"Gender, Genre, and Writing Style in Formal Written Texts\u2019,","volume":"23","author":"Argamon","year":"2003","journal-title":"Text"},{"first-page":"1","year":"2005","author":"Argamon","key":"2024061809544876000_fqae013-B2"},{"year":"2003","author":"Argamon","key":"2024061809544876000_fqae013-B3","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956805"},{"key":"2024061809544876000_fqae013-B4","doi-asserted-by":"publisher","first-page":"e0181142","DOI":"10.1371\/journal.pone.0181142","article-title":"\u2018What Is Relevant in a Text Document\u2019: An Interpretable Machine Learning Approach\u2019,","volume":"12","author":"Arras","year":"2017","journal-title":"PLoS One"},{"first-page":"69","year":"2002","author":"Baayen","key":"2024061809544876000_fqae013-B5"},{"key":"2024061809544876000_fqae013-B6","first-page":"4","article-title":"A Review of Machine Learning Algorithms for Text-Documents Classification\u2019,","author":"Baharudin","year":"2010","journal-title":"Journal of Advances in Information Technology"},{"key":"2024061809544876000_fqae013-B7","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1515\/ling.1989.27.1.3","article-title":"A Typology of English Texts\u2019,","volume":"27","author":"Biber","year":"1989","journal-title":"Linguistics"},{"volume-title":"Variation across Speech and Writing","year":"1991","author":"Biber","key":"2024061809544876000_fqae013-B8"},{"key":"2024061809544876000_fqae013-B9","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1080\/09332480.2003.10554843","article-title":"Who Wrote the 15th Book of Oz? An Application of Multivariate Analysis to Authorship Attribution\u2019,","volume":"16","author":"Binongo","year":"2003","journal-title":"Chance"},{"key":"2024061809544876000_fqae013-B10","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1093\/llc\/9.4.267","article-title":"Joaquin\u2019s Joaquinesquerie, Joaquinesquerie\u2019s Joaquin: A Statistical Expression of a Filipino Writer\u2019s Style\u2019,","volume":"9","author":"Binongo","year":"1994","journal-title":"Literary and Linguistic Computing"},{"volume-title":"Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit","year":"2009","author":"Bird","key":"2024061809544876000_fqae013-B11"},{"key":"2024061809544876000_fqae013-B12","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1093\/llc\/17.3.267","article-title":"\u2018Delta\u2019: A Measure of Stylistic Difference and a Guide to Likely Authorship\u2019,","volume":"17","author":"Burrows","year":"2002","journal-title":"Literary and Linguistic Computing"},{"first-page":"142","key":"2024061809544876000_fqae013-B13","doi-asserted-by":"publisher","DOI":"10.3115\/977035.977055"},{"year":"2004","author":"De Roeck","key":"2024061809544876000_fqae013-B14"},{"key":"2024061809544876000_fqae013-B15","doi-asserted-by":"publisher","first-page":"1506","DOI":"10.1002\/asi.20427","article-title":"Learning to Classify Documents According to Genre\u2019,","volume":"57","author":"Finn","year":"2006","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"2024061809544876000_fqae013-B16","first-page":"7","article-title":"Brown Corpus Manual\u2019,","volume":"5","author":"Francis","year":"1979","journal-title":"Letters to the Editor"},{"year":"2000","author":"Graham","key":"2024061809544876000_fqae013-B17"},{"year":"2005","author":"Gupta","key":"2024061809544876000_fqae013-B18"},{"key":"2024061809544876000_fqae013-B19","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1080\/00437956.1954.11659520","article-title":"Distributional Structure\u2019,","volume":"10","author":"Harris","year":"1954","journal-title":"Word"},{"key":"2024061809544876000_fqae013-B20","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1007\/BF01830689","article-title":"Authorship Attribution\u2019,","volume":"28","author":"Holmes","year":"1994","journal-title":"Computers and the Humanities"},{"year":"2017","author":"Honnibal","key":"2024061809544876000_fqae013-B21"},{"year":"1998","author":"Joachims","key":"2024061809544876000_fqae013-B22","doi-asserted-by":"publisher","DOI":"10.17877\/DE290R-5097"},{"key":"2024061809544876000_fqae013-B23","doi-asserted-by":"publisher","first-page":"750","DOI":"10.1016\/j.poetic.2013.08.005","article-title":"Significant Themes in 19th-Century Literature\u2019,","volume":"41","author":"Jockers","year":"2013","journal-title":"Poetics"},{"year":"1994","author":"Karlgren","key":"2024061809544876000_fqae013-B24"},{"first-page":"59","year":"2014","author":"Kestemont","key":"2024061809544876000_fqae013-B25"},{"key":"2024061809544876000_fqae013-B26","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1007\/978-3-540-30115-8_22","volume-title":"Machine Learning: ECML 2004","author":"Klimt","year":"2004"},{"key":"2024061809544876000_fqae013-B27","doi-asserted-by":"publisher","first-page":"1519","DOI":"10.1002\/asi.20428","article-title":"Feature Instability as a Criterion for Selecting Potential Style Markers\u2019,","volume":"57","author":"Koppel","year":"2006","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"2024061809544876000_fqae013-B28","first-page":"2579","article-title":"Viualizing Data Using T-SNE\u2019,","volume":"9","author":"van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"2024061809544876000_fqae013-B29","doi-asserted-by":"publisher","first-page":"897","DOI":"10.1145\/2488388.2488466","author":"McAuley","year":"2013"},{"first-page":"29","year":"2007","author":"Mikros","key":"2024061809544876000_fqae013-B30"},{"key":"2024061809544876000_fqae013-B31","doi-asserted-by":"publisher","first-page":"370","DOI":"10.1016\/S0019-9958(58)90229-8","article-title":"Length-Frequency Statistics for Written English\u2019,","volume":"1","author":"Miller","year":"1958","journal-title":"Information and Control"},{"key":"2024061809544876000_fqae013-B32","first-page":"275","article-title":"Inference in an Authorship Problem\u2019,","volume":"58","author":"Mosteller","year":"1963","journal-title":"Journal of the American Statistical Association"},{"key":"2024061809544876000_fqae013-B33","doi-asserted-by":"crossref","first-page":"10","DOI":"10.30564\/ese.v3i1.3170","article-title":"A New Model for Automatic Text Classification\u2019,","volume":"3","author":"Moumivand","year":"2021","journal-title":"Electrical Science & Engineering"},{"year":"2023","author":"PLoS","key":"2024061809544876000_fqae013-B34"},{"year":"2022","author":"poetryfoundation.org","key":"2024061809544876000_fqae013-B35"},{"key":"2024061809544876000_fqae013-B36","first-page":"37","article-title":"Evaluation: From Precision, Recall and f-Measure to Roc., Informedness, Markedness & Correlation\u2019,","volume":"2","author":"Powers","year":"2011","journal-title":"Journal of Machine Learning Technologies"},{"year":"2023","author":"Project Gutenberg","key":"2024061809544876000_fqae013-B37"},{"first-page":"199","year":"2006","author":"Schler","key":"2024061809544876000_fqae013-B38"},{"year":"2017","author":"Tatman","key":"2024061809544876000_fqae013-B39"},{"year":"2023","author":"Textual Optics Lab","key":"2024061809544876000_fqae013-B40"},{"key":"2024061809544876000_fqae013-B41","first-page":"774","article-title":"Pattern Recognition Using Generalized Portrait Method\u2019,","volume":"24","author":"Vapnik","year":"1963","journal-title":"Automation and Remote Control"},{"year":"1999","author":"Wolters","key":"2024061809544876000_fqae013-B42"},{"key":"2024061809544876000_fqae013-B43","doi-asserted-by":"publisher","DOI":"10.1007\/11562382_14","author":"Zhao","year":"2005","journal-title":"Effective and Scalable Authorship Attribution Using Function Words"}],"container-title":["Digital Scholarship in the Humanities"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/dsh\/article-pdf\/39\/2\/765\/58267380\/fqae013.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/dsh\/article-pdf\/39\/2\/765\/58267380\/fqae013.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,18]],"date-time":"2024-06-18T10:56:05Z","timestamp":1718708165000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/dsh\/article\/39\/2\/765\/7634746"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,25]]},"references-count":43,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2024,3,25]]},"published-print":{"date-parts":[[2024,6,1]]}},"URL":"https:\/\/doi.org\/10.1093\/llc\/fqae013","relation":{},"ISSN":["2055-7671","2055-768X"],"issn-type":[{"type":"print","value":"2055-7671"},{"type":"electronic","value":"2055-768X"}],"subject":[],"published-other":{"date-parts":[[2024,6]]},"published":{"date-parts":[[2024,3,25]]}}}