{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T14:30:21Z","timestamp":1774967421865,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319754864","type":"print"},{"value":"9783319754871","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-75487-1_32","type":"book-chapter","created":{"date-parts":[[2018,3,20]],"date-time":"2018-03-20T08:53:10Z","timestamp":1521535990000},"page":"406-417","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Collecting and Annotating Indian Social Media Code-Mixed Corpora"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6244-8626","authenticated-orcid":false,"given":"Anupam","family":"Jamatia","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5252-707X","authenticated-orcid":false,"given":"Bj\u00f6rn","family":"Gamb\u00e4ck","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3818-8227","authenticated-orcid":false,"given":"Amitava","family":"Das","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,3,21]]},"reference":[{"key":"32_CR1","first-page":"145","volume-title":"Standard Languages and Language Standards in a Changing Europe","author":"J Androutsopoulos","year":"2011","unstructured":"Androutsopoulos, J.: Language change and digital media: a review of conceptions and evidence. In: Kristiansen, T., Coupland, N. (eds.) Standard Languages and Language Standards in a Changing Europe, pp. 145\u2013159. Novus, Oslo (2011)"},{"key":"32_CR2","unstructured":"Baldwin, T., Cook, P., Lui, M., MacKinlay, A., Wang, L.: How noisy social media text, how diffrnt social media sources? In: Proceedings of the 6th International Joint Conference on Natural Language Processing, pp. 356\u2013364. AFNLP, Nagoya, Japan, October 2013"},{"key":"32_CR3","doi-asserted-by":"crossref","unstructured":"Bali, K., Sharma, J., Choudhury, M., Vyas, Y.: \u201cI am borrowing $$ya$$ mixing?\u201d: An analysis of English-Hindi code mixing in Facebook. In: Proceedings of the 1st Workshop on Computational Approaches to Code Switching, pp. 116\u2013126. ACL, Doha, Qatar, October 2014","DOI":"10.3115\/v1\/W14-3914"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Barman, U., Wagner, J., Chrupa\u0142a, G., Foster, J.: DCU-UVT: word-level language classification with code-mixed data. In: Proceedings of the 1st Workshop on Computational Approaches to Code Switching, pp. 127\u2013132. ACL, Doha, Qatar, October 2014","DOI":"10.3115\/v1\/W14-3915"},{"key":"32_CR5","unstructured":"Baskaran, S., Bali, K., Bhattacharya, T., Bhattacharyya, P., Choudhury, M., Jha, G.N., Rajendran, S., Saravanan, K., Sobha, L., Subbarao, K.: A common parts-of-speech tagset framework for Indian languages. In: Proceedings of the 6th International Conference on Language Resources and Evaluation, pp. 1331\u20131337. ELRA, Marrakech, Marocco, May 2008"},{"issue":"3","key":"32_CR6","first-page":"67","volume":"5","author":"MS C\u00e1rdenas-Claros","year":"2009","unstructured":"C\u00e1rdenas-Claros, M.S., Isharyanti, N.: Code switching and code mixing in internet chatting: between \u201cyes\u201d, \u201cya\u201d, and \u201csi\u201d a case study. J. Comput.-Mediat. Commun. 5(3), 67\u201378 (2009)","journal-title":"J. Comput.-Mediat. Commun."},{"issue":"3","key":"32_CR7","first-page":"41","volume":"54","author":"A Das","year":"2013","unstructured":"Das, A., Gamb\u00e4ck, B.: Code-mixing in social media text: the last language identification frontier? Traitement Automatique des Langues 54(3), 41\u201364 (2013)","journal-title":"Traitement Automatique des Langues"},{"key":"32_CR8","unstructured":"Das, A., Gamb\u00e4ck, B.: Identifying languages at the word level in code-mixed Indian social media text. In: Proceedings of the 11th International Conference on Natural Language Processing, pp. 169\u2013178, Goa, India, December 2014"},{"issue":"6","key":"32_CR9","doi-asserted-by":"crossref","first-page":"584","DOI":"10.1002\/asi.20147","volume":"58","author":"F Debole","year":"2005","unstructured":"Debole, F., Sebastiani, F.: An analysis of the relative hardness of Reuters-21578 subsets. J. Am. Soc. Inf. Sci. Technol. 58(6), 584\u2013596 (2005)","journal-title":"J. Am. Soc. Inf. Sci. Technol."},{"issue":"7","key":"32_CR10","first-page":"7417","volume":"3","author":"PS Dholakia","year":"2014","unstructured":"Dholakia, P.S., Yoonus, M.M.: Rule based approach for the transition of tagsets to build the POS annotated corpus. Int. J. Adv. Res. Comput. Commun. Eng. 3(7), 7417\u20137422 (2014)","journal-title":"Int. J. Adv. Res. Comput. Commun. Eng."},{"key":"32_CR11","unstructured":"Diab, M., Kamboj, A.: Feasibility of leveraging crowd sourcing for the creation of a large scale annotated resource for Hindi English code switched data: a pilot annotation. In: Proceedings of the 9th Workshop on Asian Language Resources, pp. 36\u201340. AFNLP, Chiang Mai, Thailand, November 2011"},{"issue":"3","key":"32_CR12","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1037\/h0057532","volume":"32","author":"R Flesch","year":"1948","unstructured":"Flesch, R.: A new readability yardstick. J. Appl. Psychol. 32(3), 221\u2013233 (1948)","journal-title":"J. Appl. Psychol."},{"issue":"1","key":"32_CR13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1177\/13670069020060010101","volume":"6","author":"J Gafaranga","year":"2002","unstructured":"Gafaranga, J., Torras, M.C.: Interactional otherness: towards a redefinition of codeswitching. Int. J. Biling. 6(1), 1\u201322 (2002)","journal-title":"Int. J. Biling."},{"key":"32_CR14","unstructured":"Gamb\u00e4ck, B., Das, A.: On measuring the complexity of code-mixing. In: Proceedings of the 1st Workshop on Language Technologies for Indian Social Media, Goa, India, pp. 1\u20137, December 2014"},{"key":"32_CR15","unstructured":"Gamb\u00e4ck, B., Das, A.: Comparing the level of code-switching in corpora. In: Proceedings of the 10th International Conference on Language Resources and Evaluation. ELRA, Portoro\u017e, Slovenia, May 2016 (to appear)"},{"key":"32_CR16","doi-asserted-by":"crossref","unstructured":"Gimpel, K., Schneider, N., O\u2019Connor, B., Das, D., Mills, D., Eisenstein, J., Heilman, M., Yogatama, D., Flanigan, J., Smith, N.A.: Part-of-speech tagging for Twitter: annotation, features, and experiments. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics, vol. 2, pp. 42\u201347. ACL, Portland, Oregon, June 2011","DOI":"10.21236\/ADA547371"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Gupta, P., Bali, K., Banchs, R.E., Choudhury, M., Rosso, P.: Query expansion for mixed-script information retrieval. In: Proceedings of the 37th International Conference on Research and Development in Information Retrieval, ACM SIGIR, Gold Coast, Queensland, Australia, pp. 677\u2013686, July 2014","DOI":"10.1145\/2600428.2609622"},{"key":"32_CR18","doi-asserted-by":"crossref","unstructured":"Hu, Y., Talamadupula, K., Kambhampati, S.: Dude, srsly?: The surprisingly formal nature of Twitter\u2019s language. In: Proceedings of the 7th International Conference on Weblogs and Social Media. AAAI, Boston, Massachusetts, July 2013","DOI":"10.1609\/icwsm.v7i1.14443"},{"key":"32_CR19","doi-asserted-by":"crossref","unstructured":"Joshi, A.K.: Processing of sentences with intra-sentential code-switching. In: Proceedings of the 9th International Conference on Computational Linguistics. ACL, Prague, Czechoslovakia, pp. 145\u2013150, July 1982","DOI":"10.3115\/991813.991836"},{"issue":"1","key":"32_CR20","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1075\/ijcl.6.1.05kil","volume":"6","author":"A Kilgarriff","year":"2001","unstructured":"Kilgarriff, A.: Comparing corpora. Int. J. Corpus Linguist. 6(1), 97\u2013133 (2001)","journal-title":"Int. J. Corpus Linguist."},{"key":"32_CR21","doi-asserted-by":"crossref","unstructured":"Nguyen, D., Do\u011fru\u00f6z, A.S.: Word level language identification in online multilingual communication. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 857\u2013862. ACL, Seattle, Washington, October 2013","DOI":"10.18653\/v1\/D13-1084"},{"issue":"3","key":"32_CR22","first-page":"n3","volume":"6","author":"JC Paolillo","year":"1996","unstructured":"Paolillo, J.C.: Language choice on soc.culture.punjab. Electron. J. Commun.\/La Revue Electronique de Communication 6(3), n3 (1996)","journal-title":"Electron. J. Commun.\/La Revue Electronique de Communication"},{"issue":"4","key":"32_CR23","first-page":"JCMC446","volume":"4","author":"J Paolillo","year":"1999","unstructured":"Paolillo, J.: The virtual speech community: social network and language variation on IRC. J. Comput.-Mediat. Commun. 4(4), JCMC446 (1999)","journal-title":"J. Comput.-Mediat. Commun."},{"key":"32_CR24","unstructured":"Petrov, S., Das, D., McDonald, R.T.: A universal part-of-speech tagset. CoRR abs\/1104.2086 (2011). http:\/\/arxiv.org\/abs\/1104.2086"},{"issue":"7","key":"32_CR25","doi-asserted-by":"crossref","first-page":"1148","DOI":"10.1093\/comjnl\/bxq069","volume":"54","author":"D Pinto","year":"2011","unstructured":"Pinto, D., Rosso, P., Jim\u00e9nez-Salazar, H.: A self-enriching methodology for clustering narrow domain short texts. Comput. J. 54(7), 1148\u20131165 (2011)","journal-title":"Comput. J."},{"key":"32_CR26","unstructured":"Rudrapal, D., Jamatia, A., Chakma, K., Das, A., Gamb\u00e4ck, B.: Sentence boundary detection for social media text. In: Proceedings of the 12th International Conference on Natural Language Processing, Trivandrum, India, pp. 91\u201397, December 2015"},{"key":"32_CR27","unstructured":"Sequiera, R., Choudhury, M., Gupta, P., Rosso, P., Kumar, S., Banerjee, S., Naskar, S.K., Bandyopadhyay, S., Chittaranjan, G., Das, A., Chakma, K.: Overview of FIRE-2015 shared task on mixed script information retrieval. In: Proceedings of the 7th Forum for Information Retrieval Evaluation, Gandhinagar, India, pp. 21\u201327, December 2015"},{"key":"32_CR28","doi-asserted-by":"crossref","unstructured":"Solorio, T., Blair, E., Maharjan, S., Bethard, S., Diab, M., Gohneim, M., Hawwari, A., AlGhamdi, F., Hirschberg, J., Chang, A., Fung, P.: Overview for the first shared task on language identification in code-switched data. In: Proceedings of the 1st Workshop on Computational Approaches to Code Switching, pp. 62\u201372. ACL, Doha, Qatar, October 2014","DOI":"10.3115\/v1\/W14-3907"},{"key":"32_CR29","doi-asserted-by":"crossref","unstructured":"Vyas, Y., Gella, S., Sharma, J., Bali, K., Choudhury, M.: POS tagging of English-Hindi code-mixed social media content. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing, pp. 974\u2013979. ACL, Doha, Qatar, October 2014","DOI":"10.3115\/v1\/D14-1105"}],"container-title":["Lecture Notes in Computer Science","Computational Linguistics and Intelligent Text Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-75487-1_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T21:55:47Z","timestamp":1751493347000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-75487-1_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319754864","9783319754871"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-75487-1_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]}}}