{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T14:24:25Z","timestamp":1768400665659,"version":"3.49.0"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2012,2,29]],"date-time":"2012-02-29T00:00:00Z","timestamp":1330473600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Scientometrics"],"published-print":{"date-parts":[[2012,11]]},"DOI":"10.1007\/s11192-012-0681-1","type":"journal-article","created":{"date-parts":[[2012,2,28]],"date-time":"2012-02-28T09:29:28Z","timestamp":1330421368000},"page":"391-411","source":"Crossref","is-referenced-by-count":44,"title":["A boosted-trees method for name disambiguation"],"prefix":"10.1007","volume":"93","author":[{"given":"Jian","family":"Wang","sequence":"first","affiliation":[]},{"given":"Kaspars","family":"Berzins","sequence":"additional","affiliation":[]},{"given":"Diana","family":"Hicks","sequence":"additional","affiliation":[]},{"given":"Julia","family":"Melkers","sequence":"additional","affiliation":[]},{"given":"Fang","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Diogo","family":"Pinheiro","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,2,29]]},"reference":[{"issue":"5","key":"681_CR1","doi-asserted-by":"crossref","first-page":"838","DOI":"10.1002\/asi.20788","volume":"59","author":"DW Aksnes","year":"2008","unstructured":"Aksnes, D. W. (2008). When different persons have an identical author name. How frequent are homonyms? Journal of the American Society for Information Science and Technology, 59(5), 838\u2013841.","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"681_CR2","unstructured":"Aswani, N., Bontcheva, K., & Cunningham, H. (2006). Mining information for instance unification. In I. Cruz, S. Decker, D. Allemang, C. Preist, D. Schwabe, & P. Mika, et al. (Eds.), The Semantic Web\u2014ISWC 2006. Lecture Notes in Computer Science. (Vol. 4273, pp. 329\u2013342). Berlin: Springer."},{"key":"681_CR3","first-page":"47","volume-title":"Proceedings of the SIAM 6th International Conference on Data Mining","author":"I Bhattacharya","year":"2006","unstructured":"Bhattacharya, I., & Getoor, L. (2006). A latent dirichlet model for unsupervised entity resolution. In J. Ghosh, D. Lambert, D. Skillicorn, & J. Srivastava (Eds.), Proceedings of the SIAM 6th International Conference on Data Mining (pp. 47\u201358). Bethesda, MD: Society for Industrial Mathematics."},{"issue":"1","key":"681_CR4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1217299.1217300","volume":"1","author":"I Bhattacharya","year":"2007","unstructured":"Bhattacharya, I., & Getoor, L. (2007). Collective entity resolution in relational data. ACM Transactions on Knowledge Discovery from Data (TKDD), 1(1), 1\u201336.","journal-title":"ACM Transactions on Knowledge Discovery from Data (TKDD)"},{"key":"681_CR5","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D. M., Ng, A. Y., & Jordan, M. I. (2003). Latent dirichlet allocation. The Journal of Machine Learning Research, 3, 993\u20131022.","journal-title":"The Journal of Machine Learning Research"},{"key":"681_CR6","volume-title":"Classification and regression trees","author":"L Breiman","year":"1984","unstructured":"Breiman, L. (1984). Classification and regression trees. Boca Raton, FL: Chapman & Hall\/CRC."},{"issue":"2","key":"681_CR7","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1023\/A:1009715923555","volume":"2","author":"CJC Burges","year":"1998","unstructured":"Burges, C. J. C. (1998). A tutorial on support vector machines for pattern recognition. Data Mining and Knowledge Discovery, 2(2), 121\u2013167.","journal-title":"Data Mining and Knowledge Discovery"},{"key":"681_CR8","first-page":"578","volume":"6","author":"FJ Cole","year":"1917","unstructured":"Cole, F. J., & Eales, N. B. (1917). The history of comparative anatomy: Part 1.-a statistical analysis of the literature. Science Progress in the Twentieth Century, 6, 578\u2013597.","journal-title":"Science Progress in the Twentieth Century"},{"issue":"9","key":"681_CR9","doi-asserted-by":"crossref","first-page":"1853","DOI":"10.1002\/asi.21363","volume":"61","author":"RG Cota","year":"2010","unstructured":"Cota, R. G., Ferreira, A. A., Nascimento, C., Goncalves, M. A., & Laender, A. H. F. (2010). An unsupervised heuristic-based hierarchical method for name disambiguation in bibliographic citations. Journal of the American Society for Information Science and Technology, 61(9), 1853\u20131870.","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"681_CR10","unstructured":"Culotta, A., Kanani, P., Hall, R., Wick, M., & McCallum, A. Author disambiguation using error-driven machine learning with a ranking loss function. In 6th International Workshop on Information Integration on the Web (IIWeb-07), Vancouver, Canada, 23 July 2007."},{"key":"681_CR11","unstructured":"Culp, M., Johnson, K., & Michailidis, G. (2010). ada: An R package for stochastic boosting. http:\/\/CRAN.R-project.org\/package=ada . Accessed 01 Aug 2011."},{"issue":"2","key":"681_CR12","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1002\/asi.21460","volume":"62","author":"CA D\u2019Angelo","year":"2011","unstructured":"D\u2019Angelo, C. A., Giuffrida, C., & Abramo, G. (2011). A heuristic approach to author name disambiguation in bibliometrics databases for large-scale research assessments. Journal of the American Society for Information Science and Technology, 62(2), 257\u2013269.","journal-title":"Journal of the American Society for Information Science and Technology"},{"issue":"2","key":"681_CR13","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1214\/aos\/1016218223","volume":"28","author":"J Friedman","year":"2000","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2000). Special invited paper. additive logistic regression: A statistical view of boosting. The Annals of Statistics, 28(2), 337\u2013374.","journal-title":"The Annals of Statistics"},{"key":"681_CR14","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1145\/996350.996419","volume-title":"Proceedings of the 4th ACM\/IEEE-CS Joint Conference on Digital Libraries","author":"H Han","year":"2004","unstructured":"Han, H., Giles, L., Zha, H., Li, C., & Tsioutsiouliklis, K. (2004). Two supervised learning approaches for name disambiguation in author citations. In H. Chen, H. Wactlar, C.-c. Chen, E.-P. Lim, & M. Christel (Eds.), Proceedings of the 4th ACM\/IEEE-CS Joint Conference on Digital Libraries (pp. 296\u2013305). New York: ACM."},{"key":"681_CR15","doi-asserted-by":"crossref","first-page":"1065","DOI":"10.1145\/1066677.1066920","volume-title":"Proceedings of the 2005 ACM Symposium on Applied Computing","author":"H Han","year":"2005","unstructured":"Han, H., Xu, W., Zha, H., & Giles, C. L. (2005a). A hierarchical naive Bayes mixture model for name disambiguation in author citations. In H. M. Haddad, A. Omicini, R. L. Wainwright, & L. M. Liebrock (Eds.), Proceedings of the 2005 ACM Symposium on Applied Computing (pp. 1065\u20131069). New York: ACM."},{"key":"681_CR16","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1145\/1065385.1065462","volume-title":"Proceedings of the 5th ACM\/IEEE-CS Joint Conference on Digital Libraries","author":"H Han","year":"2005","unstructured":"Han, H., Zha, H., & Giles, C. L. (2005b). Name disambiguation in author citations using a K-way spectral clustering method. In M. Marlino, T. Sumner, & F. Shipman (Eds.), Proceedings of the 5th ACM\/IEEE-CS Joint Conference on Digital Libraries (pp. 334\u2013343). New York: ACM."},{"key":"681_CR17","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-84858-7","volume-title":"The elements of statistical learning: data mining, inference, and prediction","author":"T Hastie","year":"2009","unstructured":"Hastie, T., Tibshirani, R., & Friedman, J. H. (2009). The elements of statistical learning: data mining, inference, and prediction (2nd ed.). New York: Springer.","edition":"2"},{"issue":"46","key":"681_CR18","doi-asserted-by":"crossref","first-page":"16569","DOI":"10.1073\/pnas.0507655102","volume":"102","author":"JE Hirsch","year":"2005","unstructured":"Hirsch, J. E. (2005). An index to quantify an individual\u2019s scientific research output. Proceedings of the National Academy of Sciences of the United States of America, 102(46), 16569\u201316572. doi: 10.1073\/pnas.0507655102 .","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"key":"681_CR19","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1145\/312624.312649","volume-title":"Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"T Hofmann","year":"1999","unstructured":"Hofmann, T. (1999). Probabilistic latent semantic indexing. In F. Gey, M. Hearst, & R. Tong (Eds.), Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 50\u201357). New York: ACM."},{"key":"681_CR20","doi-asserted-by":"crossref","first-page":"536","DOI":"10.1007\/11871637_53","volume":"4213","author":"J Huang","year":"2006","unstructured":"Huang, J., Ertekin, S., & Giles, C. (2006). Efficient name disambiguation for large-scale databases. Knowledge Discovery in Databases: PKDD 2006, 4213, 536\u2013544.","journal-title":"Knowledge Discovery in Databases: PKDD 2006"},{"key":"681_CR21","volume-title":"Applied multivariate statistical analysis","author":"RA Johnson","year":"2007","unstructured":"Johnson, R. A., & Wichern, D. W. (2007). Applied multivariate statistical analysis (6th ed.). Upper Saddle River, NJ: Pearson Prentice Hall.","edition":"6"},{"key":"681_CR22","unstructured":"Kanani, P., & McCallum, A. Efficient strategies for improving partitioning-based author coreference by incorporating Web pages as graph nodes. In 6th International Workshop on Information Integration on the Web (IIWeb-07), Vol. 23, Vancouver, Canada, 23 July 2007."},{"key":"681_CR23","unstructured":"Kanani, P., McCallum, A., & Pal, C. Improving author coreference by resource-bounded information gathering from the web. In 20th International Joint Conference on Artificial Intelligence (IJCAI), Hyderabad, India, 6\u201312 Jan 2007 (pp. 429\u2013434). Hyderabad: AAAI Press."},{"issue":"1","key":"681_CR24","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1016\/j.ipm.2008.06.006","volume":"45","author":"IS Kang","year":"2009","unstructured":"Kang, I. S., Na, S. H., Lee, S., Jung, H., Kim, P., Sung, W. K., et al. (2009). On co-authorship for author disambiguation. Information Processing and Management, 45(1), 84\u201397.","journal-title":"Information Processing and Management"},{"key":"681_CR25","first-page":"69","volume-title":"International Workshop on Information Quality in Information Systems (IQIS 2005)","author":"D Lee","year":"2005","unstructured":"Lee, D., On, B. W., Kang, J., & Park, S. (2005). Effective and scalable solutions for mixed and split citation problems in digital libraries. In L. Berti-Equille, C. Batini, & D. Srivastava (Eds.), International Workshop on Information Quality in Information Systems (IQIS 2005) (pp. 69\u201376). New York: ACM."},{"issue":"7","key":"681_CR26","doi-asserted-by":"crossref","first-page":"1019","DOI":"10.1002\/asi.20591","volume":"58","author":"D Liben-Nowell","year":"2007","unstructured":"Liben-Nowell, D., & Kleinberg, J. (2007). The link prediction problem for social networks. Journal of the American Society for Information Science and Technology, 58(7), 1019\u20131031.","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"681_CR27","unstructured":"McCallum, A., & Wellner, B. Object consolidation by graph partitioning with a conditionally-trained distance metric. In KDD Workshop on Data Cleaning, Record Linkage and Object Consolidation, Washington, DC, 24\u201327 Aug 2003. Washington, DC: Citeseer."},{"key":"681_CR28","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1145\/1141753.1141762","volume-title":"Proceedings of the 6th ACM\/IEEE-CS Joint Conference on Digital Libraries","author":"DM McRae-Spencer","year":"2006","unstructured":"McRae-Spencer, D. M., & Shadbolt, N. R. (2006). Also by the same author: AKTiveAuthor, a citation graph approach to name disambiguation. In G. Marchionini, M. L. Nelson, & C. C. Marshall (Eds.), Proceedings of the 6th ACM\/IEEE-CS Joint Conference on Digital Libraries (pp. 53\u201354). New York: ACM."},{"key":"681_CR29","volume-title":"Citation analysis in research evaluation","author":"HF Moed","year":"2005","unstructured":"Moed, H. F. (2005). Citation analysis in research evaluation. Dordrecht: Springer."},{"issue":"2","key":"681_CR30","doi-asserted-by":"crossref","first-page":"404","DOI":"10.1073\/pnas.98.2.404","volume":"98","author":"MEJ Newman","year":"2001","unstructured":"Newman, M. E. J. (2001). The structure of scientific collaboration networks. Proceedings of the National Academy of Sciences of the United States of America, 98(2), 404.","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"key":"681_CR31","doi-asserted-by":"crossref","first-page":"344","DOI":"10.1145\/1065385.1065463","volume-title":"Proceedings of the 5th ACM\/IEEE-CS Joint Conference on Digital Libraries","author":"BW On","year":"2005","unstructured":"On, B. W., Lee, D., Kang, J., Mitra, P., & Acm, (2005). Comparative study of name disambiguation problem using a scalable blocking-based framework. In M. Marlino, T. Sumner, & F. Shipman (Eds.), Proceedings of the 5th ACM\/IEEE-CS Joint Conference on Digital Libraries (pp. 344\u2013353). New York: ACM."},{"issue":"4","key":"681_CR32","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1002\/asi.21491","volume":"62","author":"N Onodera","year":"2011","unstructured":"Onodera, N., Iwasawa, M., Midorikawa, N., Yoshikane, F., Amano, K., Ootani, Y., et al. (2011). A method for eliminating articles by homonymous authors from the large number of articles retrieved by author search. Journal of the American Society for Information Science and Technology, 62(4), 677\u2013690. doi: 10.1002\/asi.21491 .","journal-title":"Journal of the American Society for Information Science and Technology"},{"issue":"3","key":"681_CR33","doi-asserted-by":"crossref","first-page":"719","DOI":"10.1007\/s11192-008-2197-2","volume":"81","author":"A Porter","year":"2009","unstructured":"Porter, A., & Rafols, I. (2009). Is science becoming more interdisciplinary? Measuring and mapping six research fields over time. Scientometrics, 81(3), 719\u2013745. doi: 10.1007\/s11192-008-2197-2 .","journal-title":"Scientometrics"},{"issue":"5","key":"681_CR34","doi-asserted-by":"crossref","first-page":"056103","DOI":"10.1103\/PhysRevE.80.056103","volume":"80","author":"F Radicchi","year":"2009","unstructured":"Radicchi, F., Fortunato, S., Markines, B., & Vespignani, A. (2009). Diffusion of scientific credits and the ranking of scientists. Physical Review E, 80(5), 056103.","journal-title":"Physical Review E"},{"key":"681_CR35","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1002\/aris.2009.1440430113","volume":"43","author":"NR Smalheiser","year":"2009","unstructured":"Smalheiser, N. R., & Torvik, V. I. (2009). Author Name Disambiguation. Annual Review of Information Science and Technology, 43, 287\u2013313.","journal-title":"Annual Review of Information Science and Technology"},{"key":"681_CR36","doi-asserted-by":"crossref","first-page":"342","DOI":"10.1145\/1255175.1255243","volume-title":"Proceedings of the 7th ACM\/IEEE-CS Joint Conference on Digital Libraries","author":"Y Song","year":"2007","unstructured":"Song, Y., Huang, J., Councill, I. G., Li, J., & Giles, C. L. (2007). Efficient topic-based unsupervised name disambiguation. In E. Rasmussen, R. R. Larson, E. Toms, & S. Sugimoto (Eds.), Proceedings of the 7th ACM\/IEEE-CS Joint Conference on Digital Libraries (pp. 342\u2013351). New York: ACM."},{"issue":"1","key":"681_CR37","first-page":"1","volume":"46","author":"A Strotmann","year":"2009","unstructured":"Strotmann, A., Zhao, D., & Bubela, T. (2009). Author name disambiguation for collaboration network analysis and visualization. Proceedings of the American Society for Information Science and Technology, 46(1), 1\u201320. doi: 10.1002\/meet.2009.1450460218 .","journal-title":"Proceedings of the American Society for Information Science and Technology"},{"key":"681_CR38","doi-asserted-by":"crossref","first-page":"314","DOI":"10.1145\/1141753.1141826","volume-title":"Proceedings of the 6th ACM\/IEEE-CS Joint Conference on Digital Libraries","author":"YF Tan","year":"2006","unstructured":"Tan, Y. F., Kan, M. Y., & Lee, D. (2006). Search engine driven author disambiguation. In G. Marchionini, M. L. Nelson, & C. C. Marshall (Eds.), Proceedings of the 6th ACM\/IEEE-CS Joint Conference on Digital Libraries (pp. 314\u2013315). New York: ACM."},{"issue":"3","key":"681_CR39","doi-asserted-by":"crossref","first-page":"763","DOI":"10.1007\/s11192-010-0196-6","volume":"84","author":"L Tang","year":"2010","unstructured":"Tang, L., & Walsh, J. P. (2010). Bibliometric fingerprints: name disambiguation based on approximate structure equivalence of cognitive maps. Scientometrics, 84(3), 763\u2013784. doi: 10.1007\/s11192-010-0196-6 .","journal-title":"Scientometrics"},{"key":"681_CR40","unstructured":"Therneau, T. M., & Atkinson, B. (2010). rpart: Recursive partitioning. http:\/\/CRAN.R-project.org\/package=rpart . Accessed 01 Aug 2011."},{"issue":"3","key":"681_CR41","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1552303.1552304","volume":"3","author":"VI Torvik","year":"2009","unstructured":"Torvik, V. I., & Smalheiser, N. R. (2009). Author name disambiguation in MEDLINE. ACM Transactions on Knowledge Discovery from Data (TKDD), 3(3), 1\u201329.","journal-title":"ACM Transactions on Knowledge Discovery from Data (TKDD)"},{"issue":"2","key":"681_CR42","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1002\/asi.20105","volume":"56","author":"VI Torvik","year":"2005","unstructured":"Torvik, V. I., Weeber, M., Swanson, D. R., & Smalheiser, N. R. (2005). A probabilistic similarity metric for Medline records: A model for author name disambiguation. Journal of the American Society for Information Science and Technology, 56(2), 140\u2013158. doi: 10.1002\/asi\/20105 .","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"681_CR43","unstructured":"U.S. Census Bureau (2000). Frequently occurring surnames from Census 2000. http:\/\/www.census.gov\/genealogy\/www\/data\/2000surnames\/index.html . Accessed 01 Aug 2011."},{"issue":"1","key":"681_CR44","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1007\/s11192-006-0002-7","volume":"66","author":"S Wooding","year":"2006","unstructured":"Wooding, S., Wilcox-Jay, K., Lewison, G., & Grant, J. (2006). Co-author inclusion: A novel recursive algorithmic method for dealingwith homonyms in bibliometric analysis. Scientometrics, 66(1), 11\u201321.","journal-title":"Scientometrics"},{"key":"681_CR45","unstructured":"Yang, K. H., Jiang, J. Y., Lee, H. M., & Ho, J. M. (2006). Extracting citation relationships from web documents for author disambiguation. Taipei: Technical Report (TR-IIS-06-017)."},{"key":"681_CR46","unstructured":"Yin, X., Han, J., & Yu, P. S. (2007). Object distinction: Distinguishing objects with identical names. In Proceedings of the 2007 IEEE 23rd International Conference on Data Engineering Workshop (pp. 1242\u20131246). Washington, DC: IEEE."}],"container-title":["Scientometrics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-012-0681-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11192-012-0681-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-012-0681-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,31]],"date-time":"2021-12-31T17:36:06Z","timestamp":1640972166000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11192-012-0681-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,2,29]]},"references-count":46,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2012,11]]}},"alternative-id":["681"],"URL":"https:\/\/doi.org\/10.1007\/s11192-012-0681-1","relation":{},"ISSN":["0138-9130","1588-2861"],"issn-type":[{"value":"0138-9130","type":"print"},{"value":"1588-2861","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,2,29]]}}}