{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,5]],"date-time":"2025-06-05T04:14:24Z","timestamp":1749096864748,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811022081"},{"type":"electronic","value":"9789811022098"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-981-10-2209-8_9","type":"book-chapter","created":{"date-parts":[[2016,8,8]],"date-time":"2016-08-08T02:39:56Z","timestamp":1470623996000},"page":"97-107","source":"Crossref","is-referenced-by-count":3,"title":["GLDA: Parallel Gibbs Sampling for Latent Dirichlet Allocation on GPU"],"prefix":"10.1007","author":[{"given":"Pei","family":"Xue","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kezhao","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiankun","family":"Dong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenjing","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,8,9]]},"reference":[{"key":"9_CR1","unstructured":"Nvidia cuda. http:\/\/www.nvidia.com\/cuda"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Aila, T., Laine, S.: Understanding the efficiency of ray traversal on GPUs. In: Proceedings of the Conference on High Performance Graphics 2009, pp. 145\u2013149. ACM (2009)","DOI":"10.1145\/1572769.1572792"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Blei, D.M.: Introduction to probabilistic topicmodels. http:\/\/www.cs.princeton.edu\/blei\/papers\/Blei2011.pdf","DOI":"10.1145\/2107736.2107741"},{"key":"9_CR4","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J. Mach. Learn. Res. 3, 993\u20131022 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Chen, W.Y., Chu, J.C., Luan, J., Bai, H., Wang, Y., Chang, E.Y.: Collaborative filtering for orkut communities: discovery of user latent behavior. In: Proceedings of the 18th international conference on World wide web, pp. 681\u2013690. ACM (2009)","DOI":"10.1145\/1526709.1526801"},{"key":"9_CR6","unstructured":"Cook, S.: CUDA programming: a developer\u2019s guide to parallel computing with GPUs. Newnes (2012)"},{"issue":"5","key":"9_CR7","first-page":"601","volume":"16","author":"E Wu","year":"2004","unstructured":"Wu, E., Liu, Y.: General calculation based on graphics processing unit (in Chinese). J. Comput. Aided Des. Comput. Graph. 16(5), 601\u2013612 (2004)","journal-title":"J. Comput. Aided Des. Comput. Graph."},{"issue":"12","key":"9_CR8","first-page":"60","volume":"33","author":"H Zhang","year":"2005","unstructured":"Zhang, H., Li, L., Lan, L.: Research on the application of the general calculation of GPU (in Chinese). Comput. Digit. Eng. 33(12), 60\u201362 (2005)","journal-title":"Comput. Digit. Eng."},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Leischner, N., Osipov, V., Sanders, P.: GPU sample sort. In: 2010 IEEE International Symposium on Parallel & Distributed Processing (IPDPS), pp. 1\u201310. IEEE (2010)","DOI":"10.1109\/IPDPS.2010.5470444"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Li, T., Liu, X., Dong, Q., Ma, W., Wang, K.: HPSVM: Heterogeneous parallel SVM with factorization based ipm algorithm on CPU-GPU cluster. In: 2016 24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP), pp. 74\u201381. IEEE (2016)","DOI":"10.1109\/PDP.2016.29"},{"key":"9_CR11","series-title":"Communications in Computer and Information Science","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1007\/978-3-662-44491-7_13","volume-title":"Advanced Computer Architecture","author":"T Li","year":"2014","unstructured":"Li, T., Wang, D., Zhang, S., Yang, Y.: Parallel rank coherence in networks for inferring disease phenotype and gene set associations. In: Wu, J., Chen, H., Wang, X. (eds.) ACA 2014. CCIS, vol. 451, pp. 163\u2013176. Springer, Heidelberg (2014)"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Liu, X., Zeng, J., Yang, X., Yan, J., Yang, Q.: Scalable parallel em algorithms for latent dirichlet allocation in multi-core systems. In: Proceedings of the 24th International Conference on World Wide Web, pp. 669\u2013679. International World Wide Web Conferences Steering Committee (2015)","DOI":"10.1145\/2736277.2741106"},{"issue":"3","key":"9_CR13","first-page":"26","volume":"2","author":"Z Liu","year":"2011","unstructured":"Liu, Z., Zhang, Y., Chang, E.Y., Sun, M.: Plda+: parallel latent dirichlet allocation with data placement and pipeline processing. ACM Trans. Intell. Syst. Technol. (TIST) 2(3), 26 (2011)","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"},{"key":"9_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1007\/978-3-642-02568-6_50","volume-title":"Next-Generation Applied Intelligence","author":"T Masada","year":"2009","unstructured":"Masada, T., Hamada, T., Shibata, Y., Oguri, K.: Accelerating collapsed variational Bayesian inference for latent dirichlet allocation with nvidia CUDA compatible devices. In: Chien, B.C., Hong, T.P., Chen, S.M., Ali, M. (eds.) IEA\/AIE 2009. LNCS, vol. 5579, pp. 491\u2013500. Springer, Heidelberg (2009)"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Nallapati, R.M., Ahmed, A., Xing, E.P., Cohen, W.W.: Joint latent topic models for text and citations. In: Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining, pp. 542\u2013550. ACM (2008)","DOI":"10.1145\/1401890.1401957"},{"key":"9_CR16","unstructured":"Newman, D., Smyth, P., Welling, M., Asuncion, A.U.: Distributed inference for latent dirichlet allocation. In: Advances in Neural Information Processing Systems, pp. 1081\u20131088 (2007)"},{"key":"9_CR17","unstructured":"Smyth, P., Welling, M., Asuncion, A.U.: Asynchronous distributed learning of topic models. In: Advances in Neural Information Processing Systems. pp. 81\u201388 (2009)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Tang, J., Huo, R., Yao, J.: Evaluation of stability and similarity of latent dirichlet allocation. In: Software Engineering (WCSE), 2013 Fourth World Congress on. pp. 78\u201383. IEEE (2013)","DOI":"10.1109\/WCSE.2013.17"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Tora, S., Eguchi, K.: Mpi\/openmp hybrid parallel inference for latent dirichlet allocation. In: Proceedings of the Third Workshop on Large Scale Data Mining: Theory and Applications. pp. 5. ACM (2011)","DOI":"10.1145\/2002945.2002950"},{"key":"9_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1007\/978-3-642-02158-9_26","volume-title":"Algorithmic Aspects in Information and Management","author":"Y Wang","year":"2009","unstructured":"Wang, Y., Bai, H., Stanton, M., Chen, W.Y., Chang, E.Y.: PLDA: Parallel Latent Dirichlet Allocation for Large-Scale Applications. In: Goldberg, A.V., Zhou, Y. (eds.) AAIM 2009. LNCS, vol. 5564, pp. 301\u2013314. Springer, Heidelberg (2009)"},{"key":"9_CR21","unstructured":"Yan, F., Xu, N., Qi, Y.: Parallel inference for latent dirichlet allocation on graphics processing units. In: Advances in Neural Information Processing Systems. pp. 2134\u20132142 (2009)"},{"issue":"1","key":"9_CR22","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s00500-014-1376-8","volume":"19","author":"JF Yan","year":"2015","unstructured":"Yan, J.F., Zeng, J., Gao, Y., Liu, Z.Q.: Communication-efficient algorithms for parallel latent dirichlet allocation. Soft Computing 19(1), 3\u201311 (2015)","journal-title":"Soft Computing"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, S., Li, T., Dong, Q., Liu, X., Yang, Y.: Cpu-assisted gpu thread pool model for dynamic task parallelism. In: Networking, Architecture and Storage (NAS), 2015 IEEE International Conference on. pp. 135\u2013140. IEEE (2015)","DOI":"10.1109\/NAS.2015.7255234"}],"container-title":["Communications in Computer and Information Science","Advanced Computer Architecture"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-2209-8_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T15:57:22Z","timestamp":1749052642000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-10-2209-8_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9789811022081","9789811022098"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-2209-8_9","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2016]]}}}