{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:57:43Z","timestamp":1774540663415,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,1,25]],"date-time":"2024-01-25T00:00:00Z","timestamp":1706140800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,25]],"date-time":"2024-01-25T00:00:00Z","timestamp":1706140800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s10579-023-09707-7","type":"journal-article","created":{"date-parts":[[2024,1,25]],"date-time":"2024-01-25T10:29:08Z","timestamp":1706178548000},"page":"191-218","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Large scale annotated dataset for code-mix abusive short noisy text"],"prefix":"10.1007","volume":"59","author":[{"given":"Paras","family":"Tiwari","sequence":"first","affiliation":[]},{"given":"Sawan","family":"Rai","sequence":"additional","affiliation":[]},{"given":"C. Ravindranath","family":"Chowdary","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,25]]},"reference":[{"issue":"1","key":"9707_CR1","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1146\/annurev-psych-010419-050800","volume":"72","author":"S Atran","year":"2021","unstructured":"Atran, S. (2021). Psychology of transnational terrorism and extreme political conflict. ARP, 72(1), 471\u2013501. https:\/\/doi.org\/10.1146\/annurev-psych-010419-050800","journal-title":"ARP"},{"key":"9707_CR2","unstructured":"Banerjee, S., Moghe, N., Arora, S., & Khapra, M. M. (2018). A dataset for building code-mixed goal oriented conversation systems. In Proceedings of COLING (pp. 3766\u20133780). Retrieved from https:\/\/aclanthology.org\/C18-1319\/"},{"key":"9707_CR3","doi-asserted-by":"publisher","unstructured":"Bohra, A., Vijay, D., Singh, V., Akhtar, S. S., Shrivastava, M. (2018). A dataset of hindi-english code-mixed social media text for hate speech detection. In Proceedings of PEOPLES@NAACL-HTL (pp. 36\u201341) (2018). ACL. https:\/\/doi.org\/10.18653\/v1\/w18-1105","DOI":"10.18653\/v1\/w18-1105"},{"issue":"4","key":"9707_CR4","doi-asserted-by":"publisher","first-page":"494","DOI":"10.1016\/j.im.2017.11.004","volume":"55","author":"S Camacho","year":"2018","unstructured":"Camacho, S., Hassanein, K., & Head, M. (2018). Cyberbullying impacts on victims\u2019 satisfaction with information and communication technologies: The role of perceived cyberbullying severity. I &M, 55(4), 494\u2013507. https:\/\/doi.org\/10.1016\/j.im.2017.11.004","journal-title":"I &M"},{"key":"9707_CR5","unstructured":"Chakravarthi, B. R., Jose, N., Suryawanshi, S., Sherly, E., & McCrae, J. P. (2020). A sentiment analysis dataset for code-mixed Malayalam-English. In Proceedings of SLTU\/CCURL@LREC. ELRA (pp. 177\u2013184). Retrieved from https:\/\/aclanthology.org\/2020.sltu-1.25\/"},{"issue":"3","key":"9707_CR6","doi-asserted-by":"publisher","first-page":"765","DOI":"10.1007\/s10579-022-09583-7","volume":"56","author":"BR Chakravarthi","year":"2022","unstructured":"Chakravarthi, B. R., Priyadharshini, R., Muralidaran, V., Jose, N., Suryawanshi, S., Sherly, E., & McCrae, J. P. (2022). Dravidiancodemix: Sentiment analysis and offensive language identification dataset for dravidian languages in code-mixed text. LRE, 56(3), 765\u2013806. https:\/\/doi.org\/10.1007\/s10579-022-09583-7","journal-title":"LRE"},{"key":"9707_CR7","doi-asserted-by":"publisher","unstructured":"ElSherief, M., Nilizadeh, S., Nguyen, D., Vigna, G., & Belding, E. (2018). Peer to peer hate: Hate speech instigators and their targets. In Proceedings of ICWSM@AAAI (Vol. 12, pp. 52\u201361). https:\/\/doi.org\/10.1609\/icwsm.v12i1.15038","DOI":"10.1609\/icwsm.v12i1.15038"},{"key":"9707_CR8","doi-asserted-by":"publisher","first-page":"1019","DOI":"10.1007\/s10579-020-09486-5","volume":"54","author":"T Fornaciari","year":"2020","unstructured":"Fornaciari, T., Cagnina, L., Rosso, P., & Poesio, M. (2020). Fake opinion detection: How similar are crowdsourced datasets to real data? LRE, 54, 1019\u20131058. https:\/\/doi.org\/10.1007\/s10579-020-09486-5","journal-title":"LRE"},{"key":"9707_CR9","doi-asserted-by":"publisher","unstructured":"Founta, A. M., Djouvas, C., Chatzakou, D., Leontiadis, I., Blackburn, J., Stringhini, G., Vakali, A., Sirivianos, M., & Kourtellis, N. (2018). Large scale crowdsourcing and characterization of twitter abusive behavior. In Twelfth international CWSM@AAAI. https:\/\/doi.org\/10.1609\/icwsm.v12i1.14991","DOI":"10.1609\/icwsm.v12i1.14991"},{"key":"9707_CR10","unstructured":"Gella, S., Bali, K., & Choudhury, M. (2014). \u201cye word kis lang ka hai bhai?\u201d Testing the limits of word level language identification. In Proceedings of ICNLP (Vol. 11, pp. 368\u2013377). Retrieved from https:\/\/aclanthology.org\/W14-5151.pdf"},{"key":"9707_CR11","doi-asserted-by":"publisher","unstructured":"Gong, H., Valido, A., Ingram, K. M., Fanti, G., Bhat, S., & Espelage, D. L. (2021). Abusive language detection in heterogeneous contexts: Dataset collection and the role of supervised attention. In Proceedings of the AAAI (Vol. 35, pp. 14804\u201314812). https:\/\/doi.org\/10.1609\/aaai.v35i17.17738","DOI":"10.1609\/aaai.v35i17.17738"},{"key":"9707_CR12","unstructured":"Gupta, V. K. (2019). \"hinglish\" language\u2013modeling a messy code-mixed language. arXiv preprint arXiv:1912.13109"},{"key":"9707_CR13","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1016\/j.asoc.2020.106198","volume":"91","author":"D Jain","year":"2020","unstructured":"Jain, D., Kumar, A., & Garg, G. (2020). Sarcasm detection in mash-up language using soft-attention based bi-directional LSTM and feature-rich CNN. ASC, 91, 106\u2013198. https:\/\/doi.org\/10.1016\/j.asoc.2020.106198","journal-title":"ASC"},{"key":"9707_CR14","unstructured":"Khanuja, S., Dandapat, S., Sitaram, S., & Choudhury, M. (2020) A new dataset for natural language inference from code-mixed conversations. In Proceedings of CodeSwitch@LREC. ELRA (pp. 9\u201316). Retrieved from https:\/\/aclanthology.org\/2020.calcs-1.2\/"},{"key":"9707_CR15","doi-asserted-by":"publisher","unstructured":"Kim, S., Weber, I., Wei, L., & Oh, A. (2014). Sociolinguistic analysis of twitter in multilingual societies. In Proceedings of ACM-HT (pp. 243\u2013248). https:\/\/doi.org\/10.1145\/2631775.2631824","DOI":"10.1145\/2631775.2631824"},{"key":"9707_CR16","first-page":"185","volume":"20","author":"WF Klostermeyer","year":"1996","unstructured":"Klostermeyer, W. F., & Muslea, M. (1996). Techniques for algorithm design and analysis: \u010case study of a greedy algorithm. Informatica, 20, 185\u2013190.","journal-title":"Informatica"},{"key":"9707_CR17","doi-asserted-by":"publisher","unstructured":"Lui, M., & Baldwin, T. (2014). Accurate language identification of twitter messages. In Proceedings of LASM (pp. 17\u201325). https:\/\/doi.org\/10.3115\/v1\/w14-1303","DOI":"10.3115\/v1\/w14-1303"},{"key":"9707_CR18","doi-asserted-by":"publisher","unstructured":"Maity, K., & Saha, S. (2021). BERT-capsule model for cyberbullying detection in code-mixed Indian languages. In NLPIS (pp. 147\u2013155). https:\/\/doi.org\/10.1007\/978-3-030-80599-9_13","DOI":"10.1007\/978-3-030-80599-9_13"},{"key":"9707_CR19","doi-asserted-by":"publisher","unstructured":"Mathur, P., Sawhney, R., Ayyar, M., & Shah, R. (2018). Did you offend me? classification of offensive tweets in Hinglish language. In Proceedings of ALW2 (pp. 138\u2013148). https:\/\/doi.org\/10.18653\/v1\/w18-5118","DOI":"10.18653\/v1\/w18-5118"},{"key":"9707_CR20","doi-asserted-by":"publisher","unstructured":"Pal, R., & Sharma, D. (2019). A dataset for semantic role labelling of Hindi-English code-mixed tweets. In Proc. of LAW@ACL. ACL (pp. 178\u2013188). https:\/\/doi.org\/10.18653\/v1\/w19-4020","DOI":"10.18653\/v1\/w19-4020"},{"key":"9707_CR21","doi-asserted-by":"publisher","unstructured":"Petersen, A. M., Tenenbaum, J. N., Havlin, S., Stanley, H. E., & Perc, M. (2012). Languages cool as they expand: Allometric scaling and the decreasing need for new words. SR, 2(1), 943. https:\/\/doi.org\/10.1038\/srep00943","DOI":"10.1038\/srep00943"},{"key":"9707_CR22","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1007\/s10579-020-09502-8","volume":"55","author":"F Poletto","year":"2021","unstructured":"Poletto, F., Basile, V., Sanguinetti, M., Bosco, C., & Patti, V. (2021). Resources and benchmark corpora for hate speech detection: A systematic review. LRE, 55, 477\u2013523. https:\/\/doi.org\/10.1007\/s10579-020-09502-8","journal-title":"LRE"},{"key":"9707_CR23","unstructured":"Roark, B., Wolf-Sonkin, L., Kirov, C., Mielke, S. J., Johny, C., Demir\u015fahin, I., & Hall, K. (2020). Processing South Asian languages written in the Latin script: The Dakshina dataset. In Proceedings of (LREC) (pp. 2413\u20132423). Retrieved from https:\/\/arxiv.org\/abs\/2007.01176"},{"key":"9707_CR24","doi-asserted-by":"publisher","unstructured":"Rudra, K., Rijhwani, S., Begum, R., Bali, K., Choudhury, M., & Ganguly, N. (2016). Understanding language preference for expression of opinion and sentiment: What do Hindi-English speakers do on twitter? In Proceedings of EMNLP (pp. 1131\u20131141). https:\/\/doi.org\/10.18653\/v1\/d16-1121","DOI":"10.18653\/v1\/d16-1121"},{"key":"9707_CR25","unstructured":"Santy, S., Srinivasan, A., & Choudhury, M. (2021). Bertologicomix: How does code-mixing interact with multilingual bert? In Proceedings of DANLP (Vol. 2, pp. 111\u2013121). Retrieved from https:\/\/aclanthology.org\/2021.adaptnlp-1.12"},{"key":"9707_CR26","unstructured":"Saroj, A., & Pal, S. (2020). An Indian language social media collection for hate and offensive speech. In Proceedings of of ResT-UP@LREC (pp. 2\u20138). https:\/\/aclanthology.org\/2020.restup-1.2"},{"key":"9707_CR27","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/w14-3914","author":"J Sharma","year":"2014","unstructured":"Sharma, J., Bali, K., Choudhury, M., & Vyas, Y. (2014). \u201ci am borrowing ya mixing?\" an analysis of english-hindi code mixing in facebook. EMNLP. https:\/\/doi.org\/10.3115\/v1\/w14-3914","journal-title":"EMNLP"},{"issue":"06","key":"9707_CR28","doi-asserted-by":"publisher","first-page":"2050086","DOI":"10.1142\/S0217984920500864","volume":"34","author":"S Shekhar","year":"2020","unstructured":"Shekhar, S., Sharma, D. K., & Beg, M. S. (2020). Language identification framework in code-mixed social media text based on quantum LSTM-the word belongs to which language? MPL B, 34(06), 2050086. https:\/\/doi.org\/10.1142\/S0217984920500864","journal-title":"MPL B"},{"key":"9707_CR29","doi-asserted-by":"publisher","unstructured":"Singh, V., Vijay, D., Akhtar, S. S., & Shrivastava, M. (2018). Named entity recognition for Hindi-English code-mixed social media text. In Proceedings of NEW@ACL. https:\/\/doi.org\/10.18653\/v1\/w18-2405","DOI":"10.18653\/v1\/w18-2405"},{"key":"9707_CR30","unstructured":"Sj\u00f6bergh, J., & Araki, K. (2008). A multi-lingual dictionary of dirty words. In LREC. Citeseer. Retrieved from http:\/\/www.lrec-conf.org\/proceedings\/lrec2008\/pdf\/133_paper.pdf"},{"issue":"2","key":"9707_CR31","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1109\/MSECP.2003.1193207","volume":"1","author":"L Spitzner","year":"2003","unstructured":"Spitzner, L. (2003). The honeynet project: Trapping the hackers. IEEE S &P, 1(2), 15\u201323. https:\/\/doi.org\/10.1109\/MSECP.2003.1193207","journal-title":"IEEE S &P"},{"key":"9707_CR32","doi-asserted-by":"publisher","unstructured":"Srivastava, V., & Singh, M. (2022). Code-mixed nlg: Resources, metrics, and challenges. In 9th ACM IKDD CODS and 27th COMAD (pp. 328\u2013332). https:\/\/doi.org\/10.1145\/3493700.3493766","DOI":"10.1145\/3493700.3493766"},{"issue":"6","key":"9707_CR33","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1089\/cyber.2020.0253","volume":"24","author":"F Stevens","year":"2021","unstructured":"Stevens, F., Nurse, J. R., & Arief, B. (2021). Cyber stalking, cyber harassment, and adult mental health: A systematic review. CBSN, 24(6), 367\u2013376. https:\/\/doi.org\/10.1089\/cyber.2020.0253","journal-title":"CBSN"},{"key":"9707_CR34","doi-asserted-by":"publisher","unstructured":"Szanda\u0142a, T. (2020). Review and comparison of commonly used activation functions for deep neural networks. In Bio-inspired neurocomputing (pp. 203\u2013224). https:\/\/doi.org\/10.1007\/978-981-15-5495-7_11","DOI":"10.1007\/978-981-15-5495-7_11"},{"issue":"1","key":"9707_CR35","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1186\/s40537-022-00594-3","volume":"9","author":"S Thara","year":"2022","unstructured":"Thara, S., & Poornachandran, P. (2022). Social media text analytics of Malayalam-English code-mixed using deep learning. Journal of Big Data, 9(1), 45. https:\/\/doi.org\/10.1186\/s40537-022-00594-3","journal-title":"Journal of Big Data"},{"key":"9707_CR36","doi-asserted-by":"publisher","unstructured":"Tiwari, P., & Rai, S. (2021). Mind your tweet: Abusive tweet detection. In SPECOM (pp. 704\u2013715). https:\/\/doi.org\/10.1007\/978-3-030-87802-3_63","DOI":"10.1007\/978-3-030-87802-3_63"},{"issue":"3","key":"9707_CR37","doi-asserted-by":"publisher","first-page":"324","DOI":"10.1177\/1940161218781254","volume":"23","author":"N Usher","year":"2018","unstructured":"Usher, N., Holcomb, J., & Littman, J. (2018). Twitter makes it worse: Political journalists, gendered echo chambers, and the amplification of gender bias. IJPP, 23(3), 324\u2013344. https:\/\/doi.org\/10.1177\/1940161218781254","journal-title":"IJPP"},{"key":"9707_CR38","unstructured":"van Rosendaal, J., Caselli, T., & Nissim, M. (2020). Lower bias, higher density abusive language datasets: A recipe. In Proceedings of ResT-UP (pp. 14\u201319). Retrieved from https:\/\/www.aclweb.org\/anthology\/2020.restup-1.4.pdf"},{"key":"9707_CR39","first-page":"38","volume":"2111","author":"D Vijay","year":"2018","unstructured":"Vijay, D., Bohra, A., Singh, V., Akhtar, S. S., & Shrivastava, M. (2018). A dataset for detecting irony in Hindi-English code-mixed social media text. EMSASW@ESWC, 2111, 38\u201346.","journal-title":"EMSASW@ESWC"},{"issue":"3","key":"9707_CR40","doi-asserted-by":"publisher","first-page":"532","DOI":"10.3846\/jbem.2022.16178","volume":"23","author":"J Vveinhardt","year":"2022","unstructured":"Vveinhardt, J., & Sroka, W. (2022). What determines employee procrastination and multitasking in the workplace: Personal qualities or mismanagement? JBEM, 23(3), 532\u2013550. https:\/\/doi.org\/10.3846\/jbem.2022.16178","journal-title":"JBEM"},{"key":"9707_CR41","doi-asserted-by":"publisher","unstructured":"Wiegand, M., Ruppenhofer, J., & Kleinbauer, T. (2019). Detection of abusive language: the problem of biased datasets. In Proceedings of NAACL-HLT (Vol. 1, pp. 602\u2013608). ACL. https:\/\/doi.org\/10.18653\/v1\/n19-1060","DOI":"10.18653\/v1\/n19-1060"},{"key":"9707_CR42","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1016\/j.chb.2019.08.006","volume":"102","author":"S Xu","year":"2020","unstructured":"Xu, S., & Zhou, A. (2020). Hashtag homophily in twitter network: Examining a controversial cause-related marketing campaign. CHB, 102, 87\u201396. https:\/\/doi.org\/10.1016\/j.chb.2019.08.006","journal-title":"CHB"}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-023-09707-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10579-023-09707-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-023-09707-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,17]],"date-time":"2025-03-17T08:26:17Z","timestamp":1742199977000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10579-023-09707-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,25]]},"references-count":42,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["9707"],"URL":"https:\/\/doi.org\/10.1007\/s10579-023-09707-7","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-2826989\/v1","asserted-by":"object"}]},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,25]]},"assertion":[{"value":"17 November 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 January 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interest"}}]}}