{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T22:21:58Z","timestamp":1776723718799,"version":"3.51.2"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2014,5,6]],"date-time":"2014-05-06T00:00:00Z","timestamp":1399334400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Scientometrics"],"published-print":{"date-parts":[[2014,9]]},"DOI":"10.1007\/s11192-014-1321-8","type":"journal-article","created":{"date-parts":[[2014,5,5]],"date-time":"2014-05-05T20:21:08Z","timestamp":1399321268000},"page":"767-786","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":204,"title":["Clustering scientific documents with topic modeling"],"prefix":"10.1007","volume":"100","author":[{"given":"Chyi-Kwei","family":"Yau","sequence":"first","affiliation":[]},{"given":"Alan","family":"Porter","sequence":"additional","affiliation":[]},{"given":"Nils","family":"Newman","sequence":"additional","affiliation":[]},{"given":"Arho","family":"Suominen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,5,6]]},"reference":[{"key":"1321_CR1","unstructured":"Apache Software Foundation, Drost, I., Dunning, T., Eastman, J., Gospodnetic, O., Ingersoll, G., Mannix, J., Owen, S., & Wettin, K. (2010). Apache mahout. http:\/\/mloss.org\/software\/view\/144\/ ."},{"key":"1321_CR2","first-page":"147","volume-title":"Advances in neural information processing systems 18","author":"D Blei","year":"2006","unstructured":"Blei, D., & Lafferty, J. (2006a). Correlated topic models. In Y. Weiss, B. Sch\u00f6lkopf, & J. Platt (Eds.), Advances in neural information processing systems 18 (pp. 147\u2013154). Cambridge: MIT Press."},{"key":"1321_CR3","volume-title":"Advances in neural information processing systems 16","author":"D Blei","year":"2004","unstructured":"Blei, D., Griffiths, T. L., Jordan, M. I., Tenenbaum, J. B., et al. (2004). Hierarchical topic models and the nested chinese restaurant process. In S. Thrun, L. Saul, & B. Sch\u00f6lkopf (Eds.), Advances in neural information processing systems 16. Cambridge: MIT Press."},{"key":"1321_CR4","doi-asserted-by":"crossref","unstructured":"Blei, D.M., & Lafferty, J.D. (2006b). Dynamic topic models. In Proceedings of the 23rd International Conference on Machine Learning (p. 113120).","DOI":"10.1145\/1143844.1143859"},{"issue":"1","key":"1321_CR5","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1214\/07-AOAS114","volume":"1","author":"DM Blei","year":"2007","unstructured":"Blei, D. M., & Lafferty, J. D. (2007). A correlated topic model of science. The Annals of Applied Statistics, 1(1), 17\u201335.","journal-title":"The Annals of Applied Statistics"},{"key":"1321_CR6","first-page":"71","volume-title":"Text mining: Classification, clustering, and applications","author":"DM Blei","year":"2009","unstructured":"Blei, D. M., & Lafferty, J. D. (2009). Text mining: Classification, clustering, and applications (10th ed., pp. 71\u201394). London: Taylor and Francis. chap Topic Models.","edition":"10"},{"key":"1321_CR7","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D. M., Ng, A. Y., Jordon, M. I., et al. (2003). Latent dirichlet allocation. The Journal of Machine Learning Research, 3, 993\u20131022.","journal-title":"The Journal of Machine Learning Research"},{"key":"1321_CR8","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1002\/aris.1440360102","volume":"36","author":"C Borgman","year":"2002","unstructured":"Borgman, C., & Furner, J. (2002). Scholarly communication and bibliometrics. Annual Review of Information Science and Technology, 36, 3\u201372.","journal-title":"Annual Review of Information Science and Technology"},{"issue":"8","key":"1321_CR9","doi-asserted-by":"crossref","first-page":"981","DOI":"10.1016\/j.techfore.2006.04.004","volume":"73","author":"T Daim","year":"2006","unstructured":"Daim, T., Rueda, G., Martin, H., & Gerdsri, P. (2006). Forecasting emerging technologies: Use of bibliometrics and patent analysis. Technological Forecasting & Social Change, 73(8), 981\u20131012.","journal-title":"Technological Forecasting & Social Change"},{"key":"1321_CR10","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511546914","volume-title":"The text mining handbook: Advanced approaches in analyzing unstructured data","author":"R Feldman","year":"2006","unstructured":"Feldman, R., & Sanger, J. (2006). The text mining handbook: Advanced approaches in analyzing unstructured data. Cambridge: Cambridge University Press."},{"key":"1321_CR11","doi-asserted-by":"crossref","unstructured":"Ferrara, A., & Salini, S. (2012). Ten challenges in modeling bibliographic data for bibliometric analysis. Scientometrics 121.","DOI":"10.1007\/s11192-012-0810-x"},{"issue":"1","key":"1321_CR12","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1007\/s11192-005-0208-0","volume":"63","author":"P Glenisson","year":"2005","unstructured":"Glenisson, P., Glnzel, W., & Persson, O. (2005). Combining full-text analysis and bibliometric indicators. A pilot study. Scientometrics, 63(1), 163\u2013180.","journal-title":"Scientometrics"},{"key":"1321_CR13","doi-asserted-by":"crossref","first-page":"5228","DOI":"10.1073\/pnas.0307752101","volume":"101","author":"TL Griffiths","year":"2004","unstructured":"Griffiths, T. L., & Steyvers, M. (2004). Finding scientific topics. Proceedings of the National Academy of Sciences of the United States of America, 101, 5228\u20135235.","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"issue":"13","key":"1321_CR14","doi-asserted-by":"crossref","first-page":"130","DOI":"10.18637\/jss.v040.i13","volume":"40","author":"B Gr\u00fcn","year":"2011","unstructured":"Gr\u00fcn, B., & Hornik, K. (2011). Topicmodels: An R package for fitting topic models. Journal of Statistical Software, 40(13), 130.","journal-title":"Journal of Statistical Software"},{"key":"1321_CR15","doi-asserted-by":"crossref","unstructured":"Hofmann, T. (1999). Probabilistic latent semantic indexing. In Proceedings of the 22nd annual international ACM SIGIR conference on Research and development in information retrieval (pp. 50\u201357).","DOI":"10.1145\/312624.312649"},{"key":"1321_CR16","unstructured":"McCallum, A. (2002). Mallet: A machine learning for language toolkit. http:\/\/mallet.cs.umass.edu ."},{"key":"1321_CR17","doi-asserted-by":"crossref","unstructured":"Nallapati, R., Cohen, W., & Lafferty, J. (2007). Parallelized variational EM for latent dirichlet allocation: An experimental evaluation of speed and scalability. In 7th IEEE International Conference on Data Mining Workshops, 2007. ICDM Workshops 2007 (pp. 349\u2013354).","DOI":"10.1109\/ICDMW.2007.33"},{"key":"1321_CR18","unstructured":"Newman, N.C., Porter, A.L., Newman, D., Trumbach, C.C., & Bolan, S.D. (2012). Comparing methods to extract technical content for technological intelligence. In Technology Management for Emerging Technologies (PICMET), 2012 Proceedings of PICMET\u201912 (p. 12791285)."},{"key":"1321_CR19","unstructured":"Ni, C., Sugimoto, C., & Cronin, B. (2012). Visualizing and comparing four facets of scholarly communication: producers, artifacts, concepts, and gatekeepers. Scientometrics pp. 1\u201313."},{"issue":"1\u20132","key":"1321_CR20","first-page":"703710","volume":"3","author":"A Smola","year":"2010","unstructured":"Smola, A., & Narayanamurthy, S. (2010). An architecture for parallel topic models. Proceedings of the VLDB Endowment, 3(1\u20132), 703710.","journal-title":"Proceedings of the VLDB Endowment"},{"key":"1321_CR21","doi-asserted-by":"crossref","unstructured":"Steyvers, M., Smyth, P., Rosen-Zvi, M., & Griffiths, T. (2004). Probabilistic author-topic models for information discovery. In Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 306\u2013315).","DOI":"10.1145\/1014052.1014087"},{"key":"1321_CR22","doi-asserted-by":"crossref","unstructured":"Suominen, A. (2013). Analysis of technological progression by quantitative measures: A comparison of two technologies. Technology Analysis & Strategic Management, 25(6), 687\u2013706.","DOI":"10.1080\/09537325.2013.802930"},{"issue":"476","key":"1321_CR23","doi-asserted-by":"crossref","first-page":"1566","DOI":"10.1198\/016214506000000302","volume":"101","author":"YW Teh","year":"2006","unstructured":"Teh, Y. W., Jordan, M. I., Beal, M. J., & Blei, D. M. (2006). Hierarchical dirichlet processes. Journal of the American Statistical Association, 101(476), 1566\u20131581.","journal-title":"Journal of the American Statistical Association"},{"key":"1321_CR24","doi-asserted-by":"crossref","unstructured":"Wallach, H. (2006). Topic modeling: Beyond bag-of-words. In In Proceedings of the 23rd International Conference on Machine Learning (p. 977984). Pittsburgh, Pennsylvania, U.S.","DOI":"10.1145\/1143844.1143967"},{"key":"1321_CR25","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bai, H., Stanton, M., Chen, W.Y., & Chang, E.Y. (2009). Plda: Parallel latent dirichlet allocation for large-scale applications. In Algorithmic Aspects in Information and Management (p. 301314). Springer.","DOI":"10.1007\/978-3-642-02158-9_26"},{"key":"1321_CR26","doi-asserted-by":"crossref","unstructured":"Wei, X., & Croft, W.B. (2006). Lda-based document models for ad-hoc retrieval. In Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (p. 178185).","DOI":"10.1145\/1148170.1148204"},{"key":"1321_CR27","doi-asserted-by":"crossref","unstructured":"Yan, E., Ding, Y., & Jacob, E.K. (2012). Overlaying communities and topics: An analysis on publication networks. Scientometrics pp. 1\u201315.","DOI":"10.1007\/s11192-011-0531-6"},{"key":"1321_CR28","unstructured":"Zhai, K., Boyd-Graber, J., Asadi, N., & Alkhouja, M.L. (2012). Mr. LDA: A flexible large scale topic modeling package using variational inference in MapReduce. In Proceedings of the 21st international conference on World Wide Web (p. 879888)."},{"key":"1321_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Porter, A. L., Hu, Z., Guo, Y., & Newman, N. C. (2014). \u201cTerm clumping\u201d for technical intelligence: A case study on dye-sensitized solar cells. Technological Forecasting and Social Change. doi: 10.1016\/j.techfore.2013.12.019 .","DOI":"10.1016\/j.techfore.2013.12.019"}],"container-title":["Scientometrics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-014-1321-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11192-014-1321-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-014-1321-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,26]],"date-time":"2024-05-26T11:51:06Z","timestamp":1716724266000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11192-014-1321-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,5,6]]},"references-count":29,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2014,9]]}},"alternative-id":["1321"],"URL":"https:\/\/doi.org\/10.1007\/s11192-014-1321-8","relation":{},"ISSN":["0138-9130","1588-2861"],"issn-type":[{"value":"0138-9130","type":"print"},{"value":"1588-2861","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,5,6]]}}}