{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T04:41:34Z","timestamp":1777696894865,"version":"3.51.4"},"reference-count":49,"publisher":"SAGE Publications","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IDA"],"published-print":{"date-parts":[[2016,1,18]]},"DOI":"10.3233\/ida-150793","type":"journal-article","created":{"date-parts":[[2016,1,19]],"date-time":"2016-01-19T12:17:35Z","timestamp":1453205855000},"page":"47-65","source":"Crossref","is-referenced-by-count":0,"title":["Prototype\/topic based clustering method for weblogs"],"prefix":"10.1177","volume":"20","author":[{"given":"Fernando","family":"Perez-Tellez","sequence":"first","affiliation":[{"name":"Social Media Research Group, Institute of Technology Tallaght, Dublin, Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John","family":"Cardiff","sequence":"additional","affiliation":[{"name":"Social Media Research Group, Institute of Technology Tallaght, Dublin, Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paolo","family":"Rosso","sequence":"additional","affiliation":[{"name":"NLE Lab. - PRHLT Research Center, Universitat Polit\u00e8cnica de Val\u00e8ncia, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Pinto","sequence":"additional","affiliation":[{"name":"FCC, Benem\u00e9rita Universidad Aut\u00f3noma de Puebla, Puebla, Mexico"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"179","reference":[{"key":"10.3233\/IDA-150793_ref1","unstructured":"Agrawal N., Galan M., Liu H. and Subramanya S., Clustering blogs with collective wisdom, in: Proc of the International Conference on Web Engineering, IEEE Computer Society, USA, (2008), 336-339."},{"key":"10.3233\/IDA-150793_ref2","first-page":"77","volume-title":"A survey of text clustering algorithms","author":"Aggarwal","year":"2012"},{"key":"10.3233\/IDA-150793_ref3","first-page":"163","volume-title":"A survey of text classification algorithms","author":"Aggarwal","year":"2012"},{"key":"10.3233\/IDA-150793_ref4","unstructured":"Allan J., Carbonell J.G., Doddington G., Yamron J. and Yang Y., Topic detection and tracking pilot study: Final report, Proc DARPA Broadcast News Transcription and Understanding Workshop (1998)."},{"key":"10.3233\/IDA-150793_ref5","first-page":"37","article-title":"On-line new event detection and tracking","author":"Allan","year":"1998","journal-title":"Proc SIGIR International Conference on Research and Development in Information Retrieval"},{"key":"10.3233\/IDA-150793_ref6","unstructured":"Amigo E., Spina D., Beotas B. and Gonzalo J., Towards an evaluation framework for topic extraction systems for online reputation management, in: Proc of the Workshop on Dynamic Networks and Knowledge Discovery (DyNak), ECML\/PKDD, Pensa, Cordero, Rouveroil, Troyano and Rosso, eds, Vol. 655, CEUR-WS.org, Barcelona, Spain, 2010."},{"key":"10.3233\/IDA-150793_ref7","doi-asserted-by":"crossref","unstructured":"Banerjee S. and Pedersen T., An adapted Lesk algorithm for word sense disambiguation using WordNet, in: Proc of the CICLing 2002 Conference, Lecture Notes in Computer Science 3878 (2002), 136-145.","DOI":"10.1007\/3-540-45715-1_11"},{"key":"10.3233\/IDA-150793_ref8","doi-asserted-by":"crossref","unstructured":"Bhattacharya I. and Getoor L., A latent dirichlet model for unsupervised entity resolution, In the SIAM International Conference on Data Mining, (2006).","DOI":"10.1137\/1.9781611972764.5"},{"key":"10.3233\/IDA-150793_ref9","first-page":"993","article-title":"Latent dirichlet allocation","volume":"3","author":"Blei","year":"2003","journal-title":"The Journal of Marchine Learning Research"},{"key":"10.3233\/IDA-150793_ref10","unstructured":"Brain D. and Webb G.I., On the effect of data set size on bias and variance in classification learning, in: Proc of the Fourth Australian Knowledge Acquisition Workshop (AKAW '99), Sydney, Australia, The University of New South Wales, (1999), 117-128."},{"key":"10.3233\/IDA-150793_ref11","unstructured":"Boyd D., A Blogger's Blog: Exploring the definition of a medium, Reconstruction 6(4) (2006)."},{"key":"10.3233\/IDA-150793_ref12","unstructured":"Cai J.F., Lee W.S. and Teh Y.W., NUS-ML: Improving word sense disambiguation using topic features, in: Proc of the 4th International Workshop on Semantic Evaluations (SemEval), Association for Computational Linguistics, Morristown, NJ, USA, (2007), 249-252."},{"key":"10.3233\/IDA-150793_ref13","doi-asserted-by":"crossref","unstructured":"Fei-Fei L. and Perona P., A bayesian hierarchical model for learning natural scene categories, in: Proc of the 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 2 (2005).","DOI":"10.1109\/CVPR.2005.16"},{"key":"10.3233\/IDA-150793_ref15","unstructured":"Flynn C. and Dunnion J., Topic detection in the news domain, in: Proc of the 2004 International Symposium on Information and Communication Technologies, ACM, (2004), 103-108."},{"key":"10.3233\/IDA-150793_ref16","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4615-2710-7","volume-title":"Explorations in Automatic Thesaurus Discovery","author":"Grefenstette","year":"1994"},{"issue":"1","key":"10.3233\/IDA-150793_ref17","doi-asserted-by":"crossref","first-page":"5228","DOI":"10.1073\/pnas.0307752101","article-title":"Finding scientific topics","volume":"101","author":"Griffiths","year":"2004","journal-title":"Proc of the National Academy of Sciences of the United States of America"},{"key":"10.3233\/IDA-150793_ref18","unstructured":"Griffiths T.L. and Steyvers M., A probabilistic approach to semantic representation, in: Proc of the 24th Annual Conference of the Congnitive Science Society (2002)."},{"issue":"23","key":"10.3233\/IDA-150793_ref19","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1080\/00437956.1954.11659520","article-title":"Distributional structure","volume":"10","author":"Harris","year":"1954","journal-title":"Word"},{"key":"10.3233\/IDA-150793_ref20","unstructured":"Hofman T., Probabilistic latent semantic indexing, in: Proc of the Twenty-Second Annual International SIGIR Conference, ACM, NY, USA, (1999), 50-57."},{"key":"10.3233\/IDA-150793_ref21","unstructured":"Hotho A., Staab S. and Stumme G., Ontologies improve text document clustering, in: Proc of the Third IEEE International Conference on Data Mining, Washington, DC, USA, IEEE Computer Society, (2003)."},{"issue":"2","key":"10.3233\/IDA-150793_ref22","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1147\/rd.312.0249","article-title":"Efficient randomized pattern-matching algorithms","volume":"31","author":"Karp","year":"1987","journal-title":"IBM Journal of Research and Development"},{"issue":"6","key":"10.3233\/IDA-150793_ref23","first-page":"34","article-title":"Legal documents clustering using latent dirichlet allocation","volume":"2","author":"Kumar","year":"2012","journal-title":"International Journal of Applied Information Systems (IJAIS)"},{"key":"10.3233\/IDA-150793_ref24","unstructured":"Lesk M., Automatic sense disambiguation using machine readable dictionaries: How to tell a pine cone from an ice cream cone, in: Proc of the 5th Annual International Conference on Systems Documentation, Toronto, Ontario, Canada, ACM, (1986), 24-26."},{"key":"10.3233\/IDA-150793_ref25","doi-asserted-by":"crossref","unstructured":"Li B., Xu S. and Zhang J., Enhancing clustering blog documents by utilizing author\/reader comments, ACM Southeast Regional Conference (2007), 94-99.","DOI":"10.1145\/1233341.1233359"},{"key":"10.3233\/IDA-150793_ref26","first-page":"281","volume-title":"Some methods for classification and analysis of multivariate observations","author":"MacQueen","year":"1967"},{"key":"10.3233\/IDA-150793_ref27","volume-title":"Foundations of statistical natural language processing","author":"Manning","year":"1999"},{"key":"10.3233\/IDA-150793_ref28","doi-asserted-by":"crossref","unstructured":"Peng J., Yang D., Wang J., Wu M. and Wang J., A clustering algorithm for short documents based on concept similarity, in: Proc of the IEEE Pacific Rim Conference on Communications, Computers and Signal Processing - PACRIM'07. IEEE, (2007), 42-45.","DOI":"10.1109\/PACRIM.2007.4313172"},{"key":"10.3233\/IDA-150793_ref29","unstructured":"Perez-Tellez F., Pinto D., Cardiff J. and Rosso P., Characterizing weblog corpora, in: Proc of the 14th International Conference on Applications of Natural Language to Information Systems, NLDB-2009, Lecture Notes in Computer Science, Springer-Verlag 5723 (2009), 299-300."},{"key":"10.3233\/IDA-150793_ref30","unstructured":"Perez-Tellez F., Pinto D., Cardiff J. and Rosso P., Clustering weblogs on the basis of a topic detection method, in: Proc of the 2nd Mexican Conference on Pattern Recognition: Advances in Pattern Recognition, Springer-Verlag, Puebla, Mexico, (2010), 342-351."},{"key":"10.3233\/IDA-150793_ref31","unstructured":"Pinto D., Jimenez-Salazar H. and Rosso P., Clustering abstracts of scientific texts using the transition point technique, in: Proc of the 7th International Conference, CICLing 2006, Springer, Berlin Heidelberg, (2006), 536-546."},{"key":"10.3233\/IDA-150793_ref32","doi-asserted-by":"crossref","unstructured":"Pinto D., Rosso P. and Jim\u00e9nez H., A self-enriching methodology for clustering narrow domain short texts, The Computer Journal, doi: 10.1093\/comjnl\/bxq069, 2010.","DOI":"10.1093\/comjnl\/bxq069"},{"key":"10.3233\/IDA-150793_ref33","unstructured":"Purandare A. and Pedersen T., Word sense discrimination by clustering contexts in vector and similarity spaces, HLT-NAACL 2004 Workshop: Eighth Conference on Computational Natural Language Learning (CoNLL-2004), Boston, Massachusetts, USA, Association for Computational Linguistics, (2004), 41-48."},{"key":"10.3233\/IDA-150793_ref34","doi-asserted-by":"crossref","unstructured":"Qiu Y. and Frei H.P., Concept based query expansion, in: Proc of the 16th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, ACM, (1993), 160-169.","DOI":"10.1145\/160688.160713"},{"issue":"1","key":"10.3233\/IDA-150793_ref35","first-page":"97","article-title":"Automatic word sense discrimination","volume":"24","author":"Sch\u00fctze","year":"1998","journal-title":"Computational Linguistics Journal"},{"issue":"11","key":"10.3233\/IDA-150793_ref36","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","article-title":"A vector space model for automatic indexing","volume":"18","author":"Salton","year":"1975","journal-title":"Magazine Communications of the ACM"},{"key":"10.3233\/IDA-150793_ref37","doi-asserted-by":"crossref","unstructured":"Shubankar K., Singh A. and Pudi V., A frequent keyword-set based algorithm for topic modeling and clustering of research papers, Data Mining and Optimization (DMO), 2011 3rd Conference on, Putrajaya: IEEE, (2011), 96-102.","DOI":"10.1109\/DMO.2011.5976511"},{"key":"10.3233\/IDA-150793_ref38","doi-asserted-by":"crossref","unstructured":"Sekiguchi Y., Kawashima H., Okuda H. and Oku M., Topic detection from blog documents using users' interests, in: Proc of the 7th International Conference on Mobile Data Management (2006).","DOI":"10.1109\/MDM.2006.153"},{"key":"10.3233\/IDA-150793_ref39","unstructured":"Song Y., Wang H., Wang Z., Li H. and Chen W., Short text conceptualization using a probabilistic knowledgebase, in: Proc of the Twenty-Second International Joint Conference on Artificial Intelligence, Barcelona, Catalonia, Spain, AAAI Press, (2011), 2330-2336."},{"key":"10.3233\/IDA-150793_ref40","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1108\/eb026526","article-title":"A statistical interpretation of term specificity and its application in retrieval","volume":"28","author":"Sp\u00e1rck","year":"1972","journal-title":"Journal of Documentation"},{"key":"10.3233\/IDA-150793_ref41","unstructured":"Steinbach M., Karypis G. and Kumar V., A comparison of document clustering techniques, in: KDD Workshop on Text Mining (2000)."},{"key":"10.3233\/IDA-150793_ref42","doi-asserted-by":"crossref","unstructured":"Tsur O., Littman A. and Rappoport A., Efficient clustering of short messages into general domains, boston, The 7th International AAAI Conference on Weblogs and Social Media, AAAI, (2013).","DOI":"10.1609\/icwsm.v7i1.14420"},{"key":"10.3233\/IDA-150793_ref43","volume-title":"Information Retireval","author":"Van Rijsbergen","year":"1979"},{"issue":"1","key":"10.3233\/IDA-150793_ref44","doi-asserted-by":"crossref","first-page":"184","DOI":"10.4304\/jsw.8.1.184-191","article-title":"A method of hot topic detection in blogs using N-gram model","volume":"8","author":"Wang","year":"2013","journal-title":"Journal of Software"},{"key":"10.3233\/IDA-150793_ref45","unstructured":"Wartena C. and Brussee R., Topic detection by clustering keywords, in: Proc of the 19th International Conference on Database and Expert Systems Application, IEEE Computer Society, USA, (2008), 54-58."},{"issue":"2","key":"10.3233\/IDA-150793_ref46","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/BF00393758","article-title":"Providing machine tractable dictionary tools","volume":"5","author":"Wilks","year":"1990","journal-title":"Machine Translation Journal"},{"issue":"13","key":"10.3233\/IDA-150793_ref47","doi-asserted-by":"crossref","first-page":"1727","DOI":"10.1016\/j.patrec.2007.04.015","article-title":"Employing latent dirichlet allocation for fraud detection in telecommunications","volume":"28","author":"Xing","year":"2007","journal-title":"Pattern Recognition Letters"},{"key":"10.3233\/IDA-150793_ref48","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/meet.2011.14504801186","article-title":"Wikipedia-based topic clustering for microblogs","volume":"48","author":"Xu","year":"2011","journal-title":"Proc of the American Society for Information Science and Technology"},{"key":"10.3233\/IDA-150793_ref49","unstructured":"Zhai C., Velivelli A. and Yu B., A cross-collection mixture model for comparative text mining, in: Proc of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Seattle, WA, USA, ACM, (2004), 743-748."},{"key":"10.3233\/IDA-150793_ref50","volume-title":"Human behaviour and the principle of least effort","author":"Zipf","year":"1949"}],"container-title":["Intelligent Data Analysis"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/IDA-150793","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T09:21:15Z","timestamp":1777454475000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/IDA-150793"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1,18]]},"references-count":49,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.3233\/ida-150793","relation":{},"ISSN":["1088-467X","1571-4128"],"issn-type":[{"value":"1088-467X","type":"print"},{"value":"1571-4128","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,1,18]]}}}