{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T12:02:32Z","timestamp":1648814552229},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2012,6,4]],"date-time":"2012-06-04T00:00:00Z","timestamp":1338768000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2013,3]]},"DOI":"10.1007\/s10579-012-9192-1","type":"journal-article","created":{"date-parts":[[2012,6,4]],"date-time":"2012-06-04T11:02:47Z","timestamp":1338807767000},"page":"127-149","source":"Crossref","is-referenced-by-count":2,"title":["A document is known by the company it keeps: neighborhood consensus for short text categorization"],"prefix":"10.1007","volume":"47","author":[{"given":"Gabriela","family":"Ram\u00edrez-de-la-Rosa","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Manuel","family":"Montes-y-G\u00f3mez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thamar","family":"Solorio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luis","family":"Villase\u00f1or-Pineda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2012,6,4]]},"reference":[{"key":"9192_CR1","unstructured":"Abney, S. P. (2008). Semi-supervised learning for computational linguistics. Computer science and data analysis series. London: Chapman and Hall\/CRC."},{"key":"9192_CR2","unstructured":"Angelova, R., & Weikum, G. (2006). Graph-based text classification: Learn from your neighbors. In Proceedings of the 29th annual international ACM SIGIR conference on research and development in information retrieval, SIGIR \u201906 (pp. 485\u2013492). New York, NY: ACM."},{"key":"9192_CR3","unstructured":"Anguiano-Hern\u00e1ndez, E., Villase\u00f1or-Pineda, L., Montes-y-G\u00f3mez, M., & Rosso, P. (2010). Summarization as feature selection for document categorization on small datasets. In Proceedings of the 7th international conference on advances in natural language processing, IceTAL\u201910 (pp. 39\u201344). Berlin, Heidelberg: Springer."},{"key":"9192_CR4","unstructured":"Banerjee, S., Ramanathan, K., & Gupta, A. (2007). Clustering short texts using wikipedia. In SIGIR \u201907 Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information retrieval (pp. 787\u2013788). New York, NY: ACM."},{"key":"9192_CR5","unstructured":"Cardoso-Cachopo, A., & Oliveira, A. L. (2007). Semi-supervised single-label text categorization using centroid-based classifiers. In SAC \u201907: Proceedings of the 2007 ACM symposium on applied computing (pp. 844\u2013851). New york: ACM."},{"key":"9192_CR6","first-page":"273","volume":"20","author":"C. Cortes","year":"1995","unstructured":"Cortes, C., & Vapnik, V. (1995). Support-vector networks. Machine Learning, 20, 273\u2013297.","journal-title":"Machine Learning"},{"key":"9192_CR7","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1007\/11731139_10","volume":"3918","author":"K. Driessens","year":"2006","unstructured":"Driessens, K., Reutemann, P., Pfahringer, B., & Leschi, C. (2006). Using weighted nearest neighbor to benefit from unlabeled data. Lecture Notes in Computer Science, 3918, 60\u201369.","journal-title":"Lecture Notes in Computer Science"},{"key":"9192_CR8","unstructured":"Escobar-Acevedo, A., Montes-y-G\u00f3mez, M., & Villase\u00f1or-Pineda, L. (2009). Using nearest neighbor information to improve cross-language text classification. In Proceedings of the 8th Mexican international conference on artificial intelligence, MICAI \u201909 (pp. 157\u2013164). Berlin, Heidelberg: Springer."},{"key":"9192_CR9","unstructured":"Faguo, Z., Fan, Z., Bingru, Y., & Xingang, Y. (2010). Research on short text classification algorithm based on statistics and rules. In Proceedings of the 2010 third international symposium on electronic commerce and security, ISECS \u201910 (pp. 3\u20137). Washington, DC: IEEE Computer Society."},{"key":"9192_CR10","doi-asserted-by":"crossref","unstructured":"Fan, X., & Hu, H. (2010). A new model for chinese short-text classification considering feature extension. Artificial Intelligence and Computational Intelligence, International Conference on 2, 7\u201311.","DOI":"10.1109\/AICI.2010.125"},{"key":"9192_CR11","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511546914","volume-title":"The text mining handbook: Advanced approaches in analyzing unstructured data","author":"R. Feldman","year":"2006","unstructured":"Feldman, R., & Sanger, J. (2006). The text mining handbook: Advanced approaches in analyzing unstructured data. Cambridge, MA: Cambridge University Press."},{"key":"9192_CR12","unstructured":"Go, A., Bhayani, R., & Huang, L. (2009). Twitter sentiment classification using distant supervision (pp. 1\u20136)."},{"key":"9192_CR13","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1007\/s10791-008-9083-7","volume":"12","author":"R. Guzm\u00e1n-Cabrera","year":"2009","unstructured":"Guzm\u00e1n-Cabrera, R., Montes-y-G\u00f3mez, M., Rosso, P., & Villase\u00f1or-Pineda, L. (2009). Using the web as corpus for self-training text categorization. Information Retrieval, 12, 400\u2013415.","journal-title":"Information Retrieval"},{"key":"9192_CR14","unstructured":"Han, E. H., & Karypis, G. (2000). Centroid-based document classification: Analysis and experimental results. In Proceedings of the 4th European conference on principles of data mining and knowledge discovery, PKDD \u201900 (pp. 424\u2013431). London: Springer."},{"key":"9192_CR15","unstructured":"Healy, M., Delany, S. J., & Zamolotskikh, A. (2005). An assessment of case-based reasoning for short text message classification. In N. Creaney (Ed.), 16th Irish conference on artificial intelligence and cognitive science."},{"key":"9192_CR16","unstructured":"Hu, X., Zhang, X., Lu, C., Park, E. K., & Zhou, X. (2009). Exploiting wikipedia as external knowledge for document clustering. In Proceedings of the 15th ACM SIGKDD international conference on knowledge discovery and data mining, KDD \u201909 (pp. 389\u2013396). New York, NY: ACM."},{"key":"9192_CR17","unstructured":"Huang, Y., Sun, L., & Nie, J. (2009). Smoothing document language model with local word graph. In Proceeding of the 18th ACM conference on Information and knowledge management, CIKM \u201909 (pp. 1943\u20131946). New York, NY: ACM."},{"key":"9192_CR18","unstructured":"Ifrim, G., & Weikum, G. (2006). Transductive learning for text classification using explicit knowledge models. In J. F\u00fcrnkranz, T. Scheffer, & M. Spiliopoulou (Eds.), Proceedings of the 10th European conference on principles and practice of knowledge discovery in databases, PKDD 2006 (pp. 223\u2013234). Berlin, Heidelberg, Germany: Springer."},{"key":"9192_CR19","doi-asserted-by":"crossref","unstructured":"Jiang, E. P. (2010). Learning to integrate unlabeled data in text classification. In W. D. Yi Hang & P. S. Sandhu (Eds.), Proccedings of the 3rd IEEE international conference on computer science and information technology (Vol. 4, pp. 82\u201386). Chengdu, China.","DOI":"10.1109\/ICCSIT.2010.5564473"},{"key":"9192_CR20","doi-asserted-by":"crossref","first-page":"1173","DOI":"10.1016\/j.ipm.2006.11.006","volume":"43","author":"I. S. Kang","year":"2007","unstructured":"Kang, I. S., Na, S. H., Kim, J., & Lee, J. H. (2007). Cluster-based patent retrieval. Information Processing and Management, 43, 1173\u20131182.","journal-title":"Information Processing and Management"},{"issue":"1","key":"9192_CR21","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1016\/j.ipm.2008.07.004","volume":"45","author":"Y. Ko","year":"2009","unstructured":"Ko, Y., & Seo, J. (2009). Text classification from unlabeled documents with bootstrapping and feature projection techniques. Information Processing and Management, 45(1), 70\u201383.","journal-title":"Information Processing and Management"},{"key":"9192_CR22","unstructured":"Kurland, O., & Lee, L. (2004). Corpus structure, language models, and ad hoc information retrieval. In Proceedings of the 27th annual international ACM SIGIR conference on research and development in information retrieval, SIGIR \u201904 (pp. 194\u2013201). New York, NY: ACM."},{"key":"9192_CR23","unstructured":"Kyriakopoulou, A., & Kalamboukis, T. (2006). Text classification using clustering. In Proceedings of the ECML-PKDD discovery challenge workshop."},{"key":"9192_CR24","unstructured":"Lewis, D. (1998). Naive (bayes) at forty: The independence assumption in information retrieval. In C. N\u00e9dellec & C. Rouveirol (Eds.) Machine learning: ECML-98, lecture notes in computer science (Vol. 1398, pp. 4\u201315). Berlin\/Heidelberg: Springer."},{"key":"9192_CR25","unstructured":"Lewis, D. D. (1991). Evaluating text categorization. In Proceedings of speech and natural language workshop (pp. 312\u2013318). Los Altos, CA: Morgan Kaufmann."},{"key":"9192_CR26","unstructured":"Liu, X., & Croft, W. B. (2004). Cluster-based retrieval using language models. In Proceedings of the 27th annual international conference on research and development in information retrieval, SIGIR \u201904 (pp. 186\u2013193). New York, NY: ACM."},{"key":"9192_CR27","unstructured":"Makagonov, P., Alex, M., & Gelbukh, E. (2004). Clustering abstracts instead of full texts. In Text, speech, dialog, LNAI N 3206 (pp. 129\u2013135). Berlin: Springer."},{"key":"9192_CR28","unstructured":"Mei, Q., Zhang, D., & Zhai, C. (2008). A general optimization framework for smoothing language models on graph structures. In Proceedings of the 31st annual international ACM SIGIR conference on research and development in information retrieval, SIGIR \u201908 (pp. 611\u2013618). New York, NY: ACM."},{"key":"9192_CR29","unstructured":"Navigli, R., & Crisafulli, G. (2010). Inducing word senses to improve web search result clustering. In Proceedings of the 2010 conference on empirical methods in natural language processing, EMNLP \u201910 (pp. 116\u2013126). Stroudsburg, PA: Association for Computational Linguistics."},{"key":"9192_CR30","unstructured":"Ning, X., & Karypis, G. (2008). The set classification problem and solution methods. In Proceedings of the 2008 IEEE international conference on data mining workshops (pp. 720\u2013729). Washington, DC: IEEE Computer Society."},{"key":"9192_CR31","unstructured":"Oh, H. J., Myaeng, S. H., & Lee, M. H. (2000). A practical hypertext catergorization method using links and incrementally available class information. In Proceedings of the 23rd annual international ACM SIGIR conference on research and development in information retrieval, SIGIR \u201900 (pp. 264\u2013271). New York, NY: ACM."},{"key":"9192_CR32","unstructured":"Ostrowski, D. A. (2010). Sentiment mining within social media for topic identification. In Proceedings of the 2010 IEEE fourth international conference on semantic computing, ICSC \u201910 (pp. 394\u2013401). Washington, DC: IEEE Computer Society."},{"key":"9192_CR33","unstructured":"Perez-Tellez, F., Pinto, D., Cardiff, J., & Rosso, P. (2010). On the difficulty of clustering company tweets. In Proceedings of the 2nd international workshop on search and mining user-generated contents, SMUC \u201910 (pp. 95\u2013102). New York, NY: ACM."},{"key":"9192_CR34","unstructured":"Pinto, D. (2008). On clustering and evaluation of narrow domain short-text corpora. Ph.D. thesis, Polytechnic University of Valencia, Spain."},{"key":"9192_CR35","doi-asserted-by":"crossref","first-page":"1148","DOI":"10.1093\/comjnl\/bxq069","volume":"54","author":"D. Pinto","year":"2010","unstructured":"Pinto, D., Rosso, P., & Jim\u00e9nez-Salazar, H. (2010). A self-enriching methodology for clustering narrow domain short texts. The Computer Journal, 54, 1148\u20131165.","journal-title":"The Computer Journal"},{"key":"9192_CR36","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1613\/jair.279","volume":"4","author":"J. R. Quinlan","year":"1996","unstructured":"Quinlan, J. R. (1996). Improved use of continuous attributes in c4.5. Artificial Intelligence Research, 4, 77\u201390.","journal-title":"Artificial Intelligence Research"},{"key":"9192_CR37","unstructured":"Rigutini, L., Maggini, M., & Liu, B. (2005). An EM based training algorithm for cross-language text categorization. In Proceedings of the 2005 IEEE\/WIC\/ACM international conference on web intelligence, WI \u201905 (pp. 529\u2013535). Washington, DC: IEEE Computer Society."},{"key":"9192_CR38","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F. Sebastiani","year":"2002","unstructured":"Sebastiani, F. (2002). Machine learning in automated text categorization. ACM Computing Surveys, 34, 1\u201347.","journal-title":"ACM Computing Surveys"},{"key":"9192_CR39","unstructured":"Sen, P., & Getoor, L. (2007). Link-based classification. Technical Report CS-TR-4858, University of Maryland."},{"key":"9192_CR40","unstructured":"Sharifi, B., Hutton, M. A., & Kalita, J. (2010). Summarizing microblogs automatically. In The 2010 annual conference of the North American chapter of the association for computational linguistics, HLT \u201910 (pp. 685\u2013688). Stroudsburg, PA: Association for Computational Linguistics."},{"key":"9192_CR41","unstructured":"Sriram, B., Fuhry, D., Demir, E., Ferhatosmanoglu, H., & Demirbas, M. (2010). Short text classification in twitter to improve information filtering. In Proceeding of the 33rd international ACM SIGIR conference on research and development in information retrieval, SIGIR \u201910 (pp. 841\u2013842). New York, NY: ACM."},{"issue":"4","key":"9192_CR42","doi-asserted-by":"crossref","first-page":"667","DOI":"10.1016\/j.eswa.2004.12.023","volume":"28","author":"S. Tan","year":"2005","unstructured":"Tan, S. (2005). Neighbor-weighted k-nearest neighbor for unbalanced text corpus. Expert Systems with Applications, 28(4), 667\u2013671.","journal-title":"Expert Systems with Applications"},{"issue":"1\u20132","key":"9192_CR43","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1016\/j.eswa.2007.06.028","volume":"35","author":"S. Tan","year":"2008","unstructured":"Tan, S. (2008). An improved centroid classifier for text categorization. Expert Systems with Applications, 35(1\u20132), 279\u2013285.","journal-title":"Expert Systems with Applications"},{"key":"9192_CR44","unstructured":"Tao, T., Wang, X., Mei, Q., & Zhai, C. (2006). Language model information retrieval with document expansion. In Proceedings of the main conference on human language technology conference of the North American chapter of the association of computational linguistics, HLT-NAACL \u201906 (pp. 407\u2013414). Stroudsburg, PA: Association for Computational Linguistics."},{"key":"9192_CR45","unstructured":"Tao, Y., & Xi-wei, W. (2010). Feature extension for short text. In Z. J. Youfeng Zou Fei Yu (Ed.) Proceedings of the third international symposium on computer science and computational technology, ISCSCT \u201910 (pp. 338\u2013341). China: Jiaozuo."},{"key":"9192_CR46","unstructured":"Udupa, R., Bhole, A., & Bhattacharyya, P. (2009). \u201dA term is known by the company it keeps\": On selecting a good expansion set in pseudo-relevance feedback. In Proceedings of the 2nd international conference on theory of information retrieval: advances in information retrieval theory, ICTIR \u201909 (pp. 104\u2013115). Berlin, Heidelberg: Springer."},{"key":"9192_CR47","unstructured":"Wang, J., Zhou, Y., Li, L., Hu, B., & Hu, X. (2009). Improving short text clustering performance with keyword expansion. In H. Wang, Y. Shen, T. Huang, & Z. Zeng (Eds.) The sixth international symposium on neural networks (ISNN 2009), advances in intelligent and soft computing (Vol. 56, pp. 291\u2013298). Berlin\/Heidelberg: Springer."},{"key":"9192_CR48","unstructured":"Wermter, S., Panchev, C., & Arevian, G. (1999). Hybrid neural plausibility networks for news agents. In Proceedings of the sixteenth national conference on artificial intelligence and the eleventh innovative applications of artificial intelligence conference innovative applications of artificial intelligence, AAAI \u201999\/IAAI \u201999 (pp. 93\u201398). Menlo Park, CA: American Association for Artificial Intelligence."},{"key":"9192_CR49","unstructured":"Witten, I., & Frank, E. (2005). Data mining: Practical machine learning tools and techniques (2nd ed.). Morgan Kaufmann Series in Data Management Systems. San Fransisco, CA: Morgan Kaufmann."},{"key":"9192_CR50","unstructured":"Xu, Z., Jin, R., Huang, K., Lyu, M. R., & King, I. (2008). Semi-supervised text categorization by active search. In Proceeding of the 17th ACM conference on information and knowledge management, CIKM \u201908 (pp. 1517\u20131518). New York, NY: ACM."},{"key":"9192_CR51","unstructured":"Zelikovitz, S. (2004). Transductive LSI for short text classification problems. In FLAIRS conference."},{"key":"9192_CR52","unstructured":"Zelikovitz, S., & Hirsh, H. (2000). Improving short text classification using unlabeled background knowledge to assess document similarity. In Proceedings of the seventeenth international conference on machine learning, ICML\u201900 (pp. 1183\u20131190)."}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-012-9192-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10579-012-9192-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-012-9192-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,29]],"date-time":"2019-06-29T06:11:01Z","timestamp":1561788661000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10579-012-9192-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6,4]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2013,3]]}},"alternative-id":["9192"],"URL":"https:\/\/doi.org\/10.1007\/s10579-012-9192-1","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,6,4]]}}}