{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T06:23:34Z","timestamp":1762928614542,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,9,14]],"date-time":"2018-09-14T00:00:00Z","timestamp":1536883200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Prog Artif Intell"],"published-print":{"date-parts":[[2019,4]]},"DOI":"10.1007\/s13748-018-0165-5","type":"journal-article","created":{"date-parts":[[2018,9,14]],"date-time":"2018-09-14T01:51:35Z","timestamp":1536889895000},"page":"123-132","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Fuzzy clustering-based semi-supervised approach for outlier detection in big text data"],"prefix":"10.1007","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6958-736X","authenticated-orcid":false,"given":"Farek","family":"Lazhar","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,14]]},"reference":[{"key":"165_CR1","volume-title":"Data Mining: Concepts and Techniques","author":"J Han","year":"2006","unstructured":"Han, J., Kamber, M.: Data Mining: Concepts and Techniques, vol. 743. Morgan Kaufmann, San Francisco (2006)"},{"key":"165_CR2","unstructured":"Tamboli, J., Shukla, M.: A survey of outlier detection algorithms for data streams. In: 3rd International Conference on Computing for Sustainable Global Development (INDIACom), pp 3535\u20133540 (2016)"},{"issue":"6","key":"165_CR3","first-page":"8153","volume":"5","author":"SS Sreevidya","year":"2014","unstructured":"Sreevidya, S.S.: A survey on outlier detection methods. Int. J. Comput. Sci. Inf. Technol. 5(6), 8153\u20138156 (2014)","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"key":"165_CR4","volume-title":"Big Data Analytics. Advances in Intelligent Systems and Computing","author":"S Sharma","year":"2018","unstructured":"Sharma, S., Jain, R.: Outlier detection in agriculture domain: application and techniques. In: Aggarwal, V., Bhatnagar, V., Mishra, D. (eds.) Big Data Analytics. Advances in Intelligent Systems and Computing, vol. 654. Springer, Singapor (2018)"},{"key":"165_CR5","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1007\/978-3-662-48577-4_3","volume-title":"Clustering High-Dimensional Data. Lecture Notes in Computer Science","author":"I Assent","year":"2015","unstructured":"Assent, I.: Efficient density-based subspace clustering in high dimensions. In: Masulli, F., Petrosino, A., Rovetta, S. (eds.) Clustering High-Dimensional Data. Lecture Notes in Computer Science, vol. 7627, pp. 34\u201349. Springer, Berlin (2015)"},{"issue":"2","key":"165_CR6","first-page":"61","volume":"4","author":"R Merrell","year":"2015","unstructured":"Merrell, R., Diaz, D.: Comparison of data mining methods on different applications: clustering and classification methods. Inf Sci Lett Lect Notes Comput Sci 4(2), 61\u201366 (2015)","journal-title":"Inf Sci Lett Lect Notes Comput Sci"},{"key":"165_CR7","doi-asserted-by":"crossref","unstructured":"Blum, A., Mitchell, T.: Combining labeled and unlabeled data with co-training. In: Proceeding COLT\u2019 98 Proceedings of the Eleventh Annual Conference on Computational Learning Theory, pp. 92\u2013100 (1998)","DOI":"10.1145\/279943.279962"},{"key":"165_CR8","volume-title":"Database Theory ICDT 2001. Lecture Notes in Computer Science","author":"CC Aggarwal","year":"2001","unstructured":"Aggarwal, C.C., Hinneburg, A., Keim, D.A.: On the surprising behavior of distance metrics in high dimensional space. In: Van den Bussche, J., Vianu, V. (eds.) Database Theory ICDT 2001. Lecture Notes in Computer Science, vol. 1973. Springer, Berlin (2001)"},{"key":"165_CR9","volume-title":"Robust Regression and Outlier Detection","author":"P Rousseeuw","year":"1996","unstructured":"Rousseeuw, P., Leroy, A.: Robust Regression and Outlier Detection, 3rd edn. Wiley, New York (1996)","edition":"3"},{"key":"165_CR10","doi-asserted-by":"crossref","unstructured":"Ramaswamy, S., Rastogi, R., Shim, K.: Efficient algorithms for mining outliers from large data sets. In: SIGMOD Conference, pp. 427\u2013438 (2000)","DOI":"10.1145\/342009.335437"},{"issue":"2","key":"165_CR11","first-page":"1306","volume":"2","author":"VS Jagadeeswaran","year":"2013","unstructured":"Jagadeeswaran, V.S., Uma, P.: Detection of noise by efficient hierarchical BIRCH algorithm for large data sets. Int. J. Adv. Res. Comput. Commun. Eng. 2(2), 1306\u20131309 (2013)","journal-title":"Int. J. Adv. Res. Comput. Commun. Eng."},{"issue":"2","key":"165_CR12","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1109\/TKDE.2005.31","volume":"17","author":"F Angiulli","year":"2005","unstructured":"Angiulli, F., Pizzuti, C.: Outlier mining in large high-dimensional data sets. IEEE Trans. Knowl. Data Eng. 17(2), 203\u2013215 (2005)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"3","key":"165_CR13","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"AK Jain","year":"1999","unstructured":"Jain, A.K., Murty, M.N., Flyn, P.J.: Data clustering: a review. ACM Comput Surv 31(3), 264\u2013323 (1999)","journal-title":"ACM Comput Surv"},{"key":"165_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-0450-1","volume-title":"Pattern Recognition with Fuzzy Objective Function Algorithms","author":"JC Bezdek","year":"1981","unstructured":"Bezdek, J.C.: Pattern Recognition with Fuzzy Objective Function Algorithms. Plenum Press, New York (1981)"},{"issue":"7","key":"165_CR15","first-page":"16","volume":"1","author":"V Kumar","year":"2013","unstructured":"Kumar, V., Kumar, S., Singh, A.K.: Outlier detection: a clustering-based approach. Int. J. Sci. Mod. Eng. 1(7), 16\u201319 (2013)","journal-title":"Int. J. Sci. Mod. Eng."},{"key":"165_CR16","unstructured":"Le, Q., Mikolov, T.: Distributed representations of sentences and documents. In: ICML\u201914 Proceedings of the 31st International Conference on International Conference on Machine Learning, Beijing, China, vol. 32, pp. II-1188\u2013II-1196 (2014)"},{"issue":"7","key":"165_CR17","first-page":"2067","volume":"4","author":"G Singh","year":"2013","unstructured":"Singh, G., Kumar, V.: An efficient clustering and distance based approach for outlier detection. Int. J. Comput. Trends Technol. 4(7), 2067\u20132072 (2013)","journal-title":"Int. J. Comput. Trends Technol."},{"key":"165_CR18","doi-asserted-by":"crossref","unstructured":"Guha, S., Rastogi, R., Shim, K.: CURE: an efficient clustering algorithm for large databases. In: ACM SIGMOD Conference, vol. 27(2) (1998)","DOI":"10.1145\/276304.276312"},{"issue":"8","key":"165_CR19","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1109\/2.781637","volume":"32","author":"G Karypis","year":"1999","unstructured":"Karypis, G., Han, E.H., Kumar, V.: Chameleon: hierarchical clustering using dynamic modeling. IEEE Comput. 32(8), 68\u201375 (1999)","journal-title":"IEEE Comput."},{"key":"165_CR20","doi-asserted-by":"crossref","unstructured":"Breunig, M.M., Kriegel, H.P. ,Ng, R.T., Lof, S.J.: Identifying density-based local outliers. In: SIGMOD Conference, pp. 93\u2013104 (2000)","DOI":"10.1145\/335191.335388"},{"key":"165_CR21","unstructured":"Knorr, E.M., Ng, R.T.: Algorithms for mining distance-based outliers in large datasets. In: Proceeding VLDB Algorithms for Mining Distance-Based Outliers in Large Datasets, pp. 392\u2013403 (1998)"},{"key":"165_CR22","doi-asserted-by":"crossref","unstructured":"\u00c7elik, M., Dada\u015fer-\u00c7elik, F., Dokuz, A.\u015e.: Anomaly detection in temperature data using DBSCAN algorithm. In: International Symposium on Innovations in Intelligent Systems and Applications (INISTA), Istanbul, Turkey, pp. 91\u201395 (2011)","DOI":"10.1109\/INISTA.2011.5946052"},{"key":"165_CR23","doi-asserted-by":"publisher","DOI":"10.1201\/9781420034912","volume-title":"Clustering for Data Mining: A Data Recovery Approach","author":"BG Mirkin","year":"2005","unstructured":"Mirkin, B.G.: Clustering for Data Mining: A Data Recovery Approach, vol. 3. CRC Press, Boca Raton (2005)"},{"key":"165_CR24","unstructured":"Wang, W., Yang, J., Muntz, R.: STING: a statistical information grid approach to spatial data mining. In: Proceedings of the 23rd International Conference on Very Large Data Bases, pp. 186\u2013195. Morgan Kaufmann Publishers Inc., Burlington (1997)"},{"key":"165_CR25","first-page":"332","volume-title":"PAKDD 2007 Workshops. Lecture Notes in Artificial Intelligence (LNAI)","author":"K Niu","year":"2007","unstructured":"Niu, K., Huang, C., Zhang, S., Chen, J.: ODDC: outlier detection using distance distribution clustering. In: Washio, T. (ed.) PAKDD 2007 Workshops. Lecture Notes in Artificial Intelligence (LNAI), vol. 4819, pp. 332\u2013343. Springer, Berlin (2007)"},{"key":"165_CR26","doi-asserted-by":"crossref","unstructured":"Breunig, M.M., Kriegel, H., Ng, R.T., et al.: LOF: identifying density-based local outliers. In: Proceedings of ACM SIGMOD International Conference on Management of Data, Dalles, TX, pp. 93\u2013104 (2000)","DOI":"10.1145\/342009.335388"},{"key":"165_CR27","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1016\/0167-8655(89)90040-8","volume":"9","author":"I Gath","year":"1989","unstructured":"Gath, I., Geva, A.: Fuzzy clustering for the estimation of the parameters of the components of mixtures of normal distribution. Pattern Recognit. Lett. 9, 77\u201386 (1989)","journal-title":"Pattern Recognit. Lett."},{"key":"165_CR28","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/0167-9473(93)90218-I","volume":"15","author":"B Cutsem","year":"1993","unstructured":"Cutsem, B., Gath, I.: Detection of outliers and robust estimation using fuzzy clustering. Comput. Stat. Data Anal. 15, 47\u201361 (1993)","journal-title":"Comput. Stat. Data Anal."},{"key":"165_CR29","doi-asserted-by":"crossref","unstructured":"Klawonn, K., H\u00f6ppner, F., Shim, K., Jayaram, B.: Efficient algorithms for mining outliers from large data sets. In: Proceeding Revised Selected Papers of the First International Workshop on Clustering High-Dimensional Data, vol. 7627, pp. 14\u201333 (2013)","DOI":"10.1007\/978-3-662-48577-4_2"},{"key":"165_CR30","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. In: Proceedings of Workshop at the International Conference on Learning Representations, Scottsdale, USA (2013)"},{"key":"165_CR31","volume-title":"Text, Speech, and Dialogue. TSD 2015. Lecture Notes in Computer Science","author":"M Campr","year":"2015","unstructured":"Campr, M., Je\u017eek, K.: Comparing semantic models for evaluating automatic document summarization. In: Kr\u00e1l, P., Matou\u0161ek, V. (eds.) Text, Speech, and Dialogue. TSD 2015. Lecture Notes in Computer Science, vol. 9302. Springer, Cham (2015)"},{"key":"165_CR32","doi-asserted-by":"crossref","unstructured":"Lau, J.H., Baldwin, T.: An empirical evaluation of doc2vec with practical insights into document embedding generation. In: Proceedings of the 1st Workshop on Representation Learning for NLP, Berlin, Germany, pp. 78\u201386 (2015)","DOI":"10.18653\/v1\/W16-1609"},{"key":"165_CR33","volume-title":"Clustering and Information Retrieval. Network Theory and Applications","author":"L Ert\u00f6z","year":"2004","unstructured":"Ert\u00f6z, L., Steinbach, M., Kumar, V.: Finding topics in collections of documents: a shared nearest neighbor approach. In: Ert\u00f6z, L., Steinbach, M., Kumar, V. (eds.) Clustering and Information Retrieval. Network Theory and Applications, vol. 11. Springer, Boston (2004)"},{"key":"165_CR34","doi-asserted-by":"crossref","unstructured":"Bayley, M.J., Gillet, V.J., Willett, P., Bradshaw, J., Green, D.V.S.: Computational analysis of molecular diversity for drug discovery. In: Proceeding of the 3rd Annual Conference on Research in Computational Molecular Biology, pp 321\u2013330. ACM Press, New York (1999)","DOI":"10.1145\/299432.299510"},{"key":"165_CR35","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1016\/S0019-9958(65)90241-X","volume":"8","author":"LA Zadeh","year":"1965","unstructured":"Zadeh, L.A.: Fuzzy sets. Inf. Control 8, 338\u2013353 (1965)","journal-title":"Inf. Control"},{"key":"165_CR36","unstructured":"Sami, \u00c4., Tommi K.: Introduction to partitioning-based clustering methods with a robust example. Reports of the Department of Mathematical Information Technology, University of Jyv\u00e4skyl\u00e4, Finland (2006)"},{"issue":"2","key":"165_CR37","first-page":"2501","volume":"5","author":"DJ Bora","year":"2014","unstructured":"Bora, D.J.: Computational analysis of molecular diversity for drug discovery. Int. J. Comput. Sci. Inf. Technol. 5(2), 2501\u20132506 (2014)","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"issue":"2","key":"165_CR38","first-page":"2501","volume":"5","author":"DJ Bora","year":"2014","unstructured":"Bora, D.J., Gupta, A.K.: Effect of different distance measures on the performance of K-means algorithm: an experimental study in Matlab. Int. J. Comput. Sci. Inf. Technol. 5(2), 2501\u20132506 (2014)","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"key":"165_CR39","volume-title":"Machine Learning and Knowledge Discovery in Databases. ECML PKDD 2014. Lecture Notes in Computer Science","author":"M Kull","year":"2014","unstructured":"Kull, M., Flach, P.A.: Reliability maps: a tool to enhance probability estimates and improve classification accuracy. In: Calders, T., Esposito, F., H\u00fcllermeier, E., Meo, R. (eds.) Machine Learning and Knowledge Discovery in Databases. ECML PKDD 2014. Lecture Notes in Computer Science, vol. 8725. Springer, Berlin (2014)"},{"issue":"2","key":"165_CR40","doi-asserted-by":"publisher","first-page":"534","DOI":"10.1007\/s10618-014-0356-z","volume":"29","author":"F Wang","year":"2015","unstructured":"Wang, F., Sun, J.: Survey on distance metric learning and dimensionality reduction in data mining. Data Min. Knowl. Discov. 29(2), 534\u2013564 (2015)","journal-title":"Data Min. Knowl. Discov."},{"key":"165_CR41","unstructured":"Wu, W.: Clustering and information retrieval. In: Feature Selection for High-Dimensional Data. Artificial Intelligence: Foundations, Theory, and Algorithms. Springer, Cham (2015)"},{"key":"165_CR42","volume-title":"Clustering and Information Retrieval. Network Theory and Applications","author":"JR Wen","year":"2004","unstructured":"Wen, J.R., Zhang, H.J.: Query clustering in the web context. In: Wu, W., Xiong, H., Shekhar, S. (eds.) Clustering and Information Retrieval. Network Theory and Applications, vol. 11. Springer, Boston (2004)"},{"key":"165_CR43","volume-title":"Pattern Recognition. MCPR 2012. Lecture Notes in Computer Science","author":"AP L\u00f3pez-Monroy","year":"2012","unstructured":"L\u00f3pez-Monroy, A.P., Montes-y-G\u00f3mez, M., Villase\u00f1or-Pineda, L., Carrasco-Ochoa, J.A., Mart\u00ednez-Trinidad, J.F.: A new document author representation for authorship attribution. In: Carrasco-Ochoa, J.A., Mart\u00ednez-Trinidad, J.F., Olvera L\u00f3pez, J.A., Boyer, K.L. (eds.) Pattern Recognition. MCPR 2012. Lecture Notes in Computer Science, vol. 7329. Springer, Berlin (2012)"},{"key":"165_CR44","doi-asserted-by":"crossref","unstructured":"Forsyth, D.: Learning to classify. In: Probability and Statistics for Computer Science. Springer, Cham (2018)","DOI":"10.1007\/978-3-319-64410-3_11"}],"container-title":["Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-018-0165-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13748-018-0165-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-018-0165-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,24]],"date-time":"2019-10-24T01:49:22Z","timestamp":1571881762000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13748-018-0165-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,14]]},"references-count":44,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,4]]}},"alternative-id":["165"],"URL":"https:\/\/doi.org\/10.1007\/s13748-018-0165-5","relation":{},"ISSN":["2192-6352","2192-6360"],"issn-type":[{"type":"print","value":"2192-6352"},{"type":"electronic","value":"2192-6360"}],"subject":[],"published":{"date-parts":[[2018,9,14]]},"assertion":[{"value":"13 April 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 September 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 September 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}