{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T06:09:13Z","timestamp":1769234953276,"version":"3.49.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,7,27]],"date-time":"2018-07-27T00:00:00Z","timestamp":1532649600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,7,27]],"date-time":"2018-07-27T00:00:00Z","timestamp":1532649600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1561687"],"award-info":[{"award-number":["1561687"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1535370"],"award-info":[{"award-number":["1535370"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000879","name":"Alfred P. Sloan Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000879","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000868","name":"Ewing Marion Kauffman Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000868","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Scientometrics"],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1007\/s11192-018-2865-9","type":"journal-article","created":{"date-parts":[[2018,7,27]],"date-time":"2018-07-27T05:09:27Z","timestamp":1532668167000},"page":"511-526","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":41,"title":["The impact of imbalanced training data on machine learning for author name disambiguation"],"prefix":"10.1007","volume":"117","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6481-2065","authenticated-orcid":false,"given":"Jinseok","family":"Kim","sequence":"first","affiliation":[]},{"given":"Jenna","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,7,27]]},"reference":[{"key":"2865_CR1","unstructured":"Bagga, A., & Baldwin, B. (1998). Algorithms for scoring coreference chains. Paper presented at the first international conference on language resources and evaluation workshop on linguistics coreference."},{"issue":"3\u20134","key":"2865_CR2","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1016\/S0378-4371(02)00736-7","volume":"311","author":"A-L Barab\u00e1si","year":"2002","unstructured":"Barab\u00e1si, A.-L., Jeong, H., Neda, Z., Ravasz, E., Schubert, A., & Vicsek, T. (2002). Evolution of the social network of scientific collaborations. Physica A-Statistical Mechanics and Its Applications, 311(3\u20134), 590\u2013614.","journal-title":"Physica A-Statistical Mechanics and Its Applications"},{"key":"2865_CR3","first-page":"2137","volume":"10","author":"S Bickel","year":"2009","unstructured":"Bickel, S., Bruckner, M., & Scheffer, T. (2009). Discriminative learning under covariate shift. Journal of Machine Learning Research, 10, 2137\u20132155.","journal-title":"Journal of Machine Learning Research"},{"key":"2865_CR4","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N. V., Bowyer, K. W., Hall, L. O., & Kegelmeyer, W. P. (2002). SMOTE: Synthetic minority over-sampling technique. Journal of Artificial Intelligence Research, 16, 321\u2013357.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"9","key":"2865_CR5","doi-asserted-by":"publisher","first-page":"1853","DOI":"10.1002\/asi.21363","volume":"61","author":"RG Cota","year":"2010","unstructured":"Cota, R. G., Ferreira, A. A., Nascimento, C., Goncalves, M. A., & Laender, A. H. F. (2010). An unsupervised heuristic-based hierarchical method for name disambiguation in bibliographic citations. Journal of the American Society for Information Science and Technology, 61(9), 1853\u20131870.","journal-title":"Journal of the American Society for Information Science and Technology"},{"issue":"7","key":"2865_CR6","doi-asserted-by":"publisher","first-page":"e70299","DOI":"10.1371\/journal.pone.0070299","volume":"8","author":"BD Fegley","year":"2013","unstructured":"Fegley, B. D., & Torvik, V. I. (2013). Has large-scale named-entity network analysis been resting on a flawed assumption? PLoS ONE, 8(7), e70299.","journal-title":"PLoS ONE"},{"issue":"2","key":"2865_CR7","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1145\/2350036.2350040","volume":"41","author":"AA Ferreira","year":"2012","unstructured":"Ferreira, A. A., Goncalves, M. A., & Laender, A. H. F. (2012). A brief survey of automatic methods for author name disambiguation. Sigmod Record, 41(2), 15\u201326.","journal-title":"Sigmod Record"},{"issue":"6","key":"2865_CR8","doi-asserted-by":"publisher","first-page":"1257","DOI":"10.1002\/asi.22992","volume":"65","author":"AA Ferreira","year":"2014","unstructured":"Ferreira, A. A., Veloso, A., Goncalves, M. A., & Laender, A. H. F. (2014). Self-training author name disambiguation for information scarce scenarios. Journal of the Association for Information Science and Technology, 65(6), 1257\u20131278.","journal-title":"Journal of the Association for Information Science and Technology"},{"issue":"5207","key":"2865_CR9","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1038\/223763b0","volume":"223","author":"E Garfield","year":"1969","unstructured":"Garfield, E. (1969). British quest for uniqueness versus American egocentrism. Nature, 223(5207), 763.","journal-title":"Nature"},{"key":"2865_CR10","unstructured":"Han, H., Giles, L., Zha, H., Li, C., & Tsioutsiouliklis, K. (2004). Two supervised learning approaches for name disambiguation in author citations. In JCDL 2004: Proceedings of the Fourth ACM\/IEEE joint conference on digital libraries (pp. 296\u2013305)."},{"key":"2865_CR11","doi-asserted-by":"crossref","unstructured":"Han, H., Xu, W., Zha, H., & Giles, C. L. (2005). A hierarchical naive Bayes mixture model for name disambiguation in author citations. Paper presented at the proceedings of the 2005 ACM symposium on Applied computing\u2014SAC\u201905, Santa Fe, NM.","DOI":"10.1145\/1066677.1066920"},{"key":"2865_CR12","doi-asserted-by":"crossref","unstructured":"Han, H., Zha, H. Y., & Giles, C. L. (2005). Name disambiguation spectral in author citations using a K-way clustering method. In Proceedings of the 5th ACM\/IEEE joint conference on digital libraries (pp. 334\u2013343).","DOI":"10.1145\/1065385.1065462"},{"issue":"9","key":"2865_CR13","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., & Garcia, E. A. (2009). Learning from imbalanced data. IEEE Transactions on Knowledge and Data Engineering, 21(9), 1263\u20131284.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"7","key":"2865_CR14","doi-asserted-by":"publisher","first-page":"1595","DOI":"10.1021\/ci4002712","volume":"53","author":"K Heikamp","year":"2013","unstructured":"Heikamp, K., & Bajorath, J. (2013). Comparison of confirmed inactive and randomly selected compounds as negative training examples in support vector machine-based virtual screening. Journal of Chemical Information and Modeling, 53(7), 1595\u20131601.","journal-title":"Journal of Chemical Information and Modeling"},{"issue":"3","key":"2865_CR15","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1016\/j.ipm.2010.10.001","volume":"47","author":"IS Kang","year":"2011","unstructured":"Kang, I. S., Kim, P., Lee, S., Jung, H., & You, B. J. (2011). Construction of a large-scale test set for author disambiguation. Information Processing and Management, 47(3), 452\u2013465.","journal-title":"Information Processing and Management"},{"key":"2865_CR39","doi-asserted-by":"publisher","DOI":"10.1007\/s11192-018-2824-5","author":"J Kim","year":"2018","unstructured":"Kim, J. (2018). Evaluating author name disambiguation for digital libraries: A case of DBLP. Scientometrics. \n                    https:\/\/doi.org\/10.1007\/s11192-018-2824-5\n                    \n                  .","journal-title":"Scientometrics"},{"issue":"6","key":"2865_CR16","doi-asserted-by":"publisher","first-page":"1446","DOI":"10.1002\/asi.23489","volume":"67","author":"J Kim","year":"2016","unstructured":"Kim, J., & Diesner, J. (2016). Distortive effects of initial-based name disambiguation on measurements of large-scale coauthorship networks. Journal of the Association for Information Science and Technology, 67(6), 1446\u20131461.","journal-title":"Journal of the Association for Information Science and Technology"},{"key":"2865_CR17","unstructured":"Kim, K., Sefid, A., & Giles, C. L. (2017). Scaling author name disambiguation with CNF blocking. arXiv preprint \n                    arXiv:1709.09657\n                    \n                  ."},{"key":"2865_CR18","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1186\/1758-2946-6-32","volume":"6","author":"R Kurczab","year":"2014","unstructured":"Kurczab, R., Smusz, S., & Bojarski, A. J. (2014). The influence of negative training set size on machine learning-based virtual screening. Journal of Cheminformatics, 6, 32.","journal-title":"Journal of Cheminformatics"},{"issue":"5","key":"2865_CR19","doi-asserted-by":"publisher","first-page":"1030","DOI":"10.1002\/asi.22621","volume":"63","author":"M Levin","year":"2012","unstructured":"Levin, M., Krawczyk, S., Bethard, S., & Jurafsky, D. (2012). Citation-based bootstrapping for large-scale author disambiguation. Journal of the American Society for Information Science and Technology, 63(5), 1030\u20131047.","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"2865_CR20","unstructured":"Li, X.-L., Liu, B., & Ng, S.-K. (2010). Negative training data can be harmful to text classification. Paper presented at the proceedings of the 2010 conference on empirical methods in natural language processing, Cambridge, MA."},{"key":"2865_CR21","doi-asserted-by":"crossref","unstructured":"Liu, B., Dai, Y., Li, X., Lee, W. S., & Yu, P. S. (2003). Building text classifiers using positive and unlabeled examples. Paper presented at the third IEEE international conference on data mining, 2003 (ICDM 2003).","DOI":"10.1109\/ICDM.2003.1250918"},{"issue":"649","key":"2865_CR22","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1007\/978-3-319-45880-9_21","volume":"2016","author":"G Louppe","year":"2016","unstructured":"Louppe, G., Al-Natsheh, H. T., Susik, M., & Maguire, E. J. (2016). Ethnicity sensitive author disambiguation using semi-supervised learning. Knowledge Engineering and Semantic Web, Kesw, 2016(649), 272\u2013287.","journal-title":"Knowledge Engineering and Semantic Web, Kesw"},{"issue":"1\u20132","key":"2865_CR23","doi-asserted-by":"publisher","first-page":"208","DOI":"10.14778\/1920841.1920871","volume":"3","author":"D Menestrina","year":"2010","unstructured":"Menestrina, D., Whang, S. E., & Garcia-Molina, H. (2010). Evaluating entity resolution results. Proceedings of the VLDB Endowment, 3(1\u20132), 208\u2013219.","journal-title":"Proceedings of the VLDB Endowment"},{"issue":"3","key":"2865_CR24","doi-asserted-by":"publisher","first-page":"1467","DOI":"10.1007\/s11192-017-2363-5","volume":"111","author":"MC Muller","year":"2017","unstructured":"Muller, M. C., Reitz, F., & Roy, N. (2017). Data sets for author name disambiguation: An empirical analysis and a new resource. Scientometrics, 111(3), 1467\u20131500.","journal-title":"Scientometrics"},{"issue":"2","key":"2865_CR25","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1073\/pnas.98.2.404","volume":"98","author":"MEJ Newman","year":"2001","unstructured":"Newman, M. E. J. (2001). The structure of scientific collaboration networks. Proceedings of the National Academy of Sciences of the United States of America, 98(2), 404\u2013409.","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"issue":"3","key":"2865_CR26","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1108\/eb046814","volume":"14","author":"M Porter","year":"1980","unstructured":"Porter, M. (1980). An algorithm for suffix stripping. Program, 14(3), 130\u2013137.","journal-title":"Program"},{"issue":"3\u20134","key":"2865_CR27","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/s00799-015-0158-y","volume":"16","author":"AF Santana","year":"2015","unstructured":"Santana, A. F., Goncalves, M. A., Laender, A. H. F., & Ferreira, A. A. (2015). On the combination of domain-specific heuristics for author name disambiguation: The nearest cluster method. International Journal on Digital Libraries, 16(3\u20134), 229\u2013246.","journal-title":"International Journal on Digital Libraries"},{"issue":"3","key":"2865_CR28","doi-asserted-by":"publisher","first-page":"1283","DOI":"10.1007\/s11192-016-1892-7","volume":"107","author":"J Schulz","year":"2016","unstructured":"Schulz, J. (2016). Using Monte Carlo simulations to assess the impact of author name disambiguation quality on different bibliometric analyses. Scientometrics, 107(3), 1283\u20131298.","journal-title":"Scientometrics"},{"issue":"2","key":"2865_CR29","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/S0378-3758(00)00115-4","volume":"90","author":"H Shimodaira","year":"2000","unstructured":"Shimodaira, H. (2000). Improving predictive inference under covariate shift by weighting the log-likelihood function. Journal of Statistical Planning and Inference, 90(2), 227\u2013244.","journal-title":"Journal of Statistical Planning and Inference"},{"issue":"1","key":"2865_CR30","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/s11192-014-1289-4","volume":"100","author":"D Shin","year":"2014","unstructured":"Shin, D., Kim, T., Choi, J., & Kim, J. (2014). Author name disambiguation using a graph model with node splitting and merging based on bibliographic information. Scientometrics, 100(1), 15\u201350.","journal-title":"Scientometrics"},{"key":"2865_CR31","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1002\/aris.2009.1440430113","volume":"43","author":"NR Smalheiser","year":"2009","unstructured":"Smalheiser, N. R., & Torvik, V. I. (2009). Author name disambiguation. Annual Review of Information Science and Technology, 43, 287\u2013313.","journal-title":"Annual Review of Information Science and Technology"},{"issue":"6","key":"2865_CR32","doi-asserted-by":"publisher","first-page":"975","DOI":"10.1109\/TKDE.2011.13","volume":"24","author":"J Tang","year":"2012","unstructured":"Tang, J., Fong, A. C. M., Wang, B., & Zhang, J. (2012). A unified probabilistic framework for name disambiguation in digital library. IEEE Transactions on Knowledge and Data Engineering, 24(6), 975\u2013987.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"3","key":"2865_CR33","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1007\/s11192-010-0196-6","volume":"84","author":"L Tang","year":"2010","unstructured":"Tang, L., & Walsh, J. P. (2010). Bibliometric fingerprints: Name disambiguation based on approximate structure equivalence of cognitive maps. Scientometrics, 84(3), 763\u2013784.","journal-title":"Scientometrics"},{"issue":"3","key":"2865_CR34","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/1552303.1552304","volume":"3","author":"VI Torvik","year":"2009","unstructured":"Torvik, V. I., & Smalheiser, N. R. (2009). Author name disambiguation in MEDLINE. ACM Transactions on Knowledge Discovery from Data, 3(3), 11.","journal-title":"ACM Transactions on Knowledge Discovery from Data"},{"key":"2865_CR35","unstructured":"Treeratpituk, P., & Giles, C. L. (2009). Disambiguating authors in academic publications using Random Forests. In JCDL 2009: Proceedings of the 2009 ACM\/IEEE joint conference on digital libraries (pp. 39\u201348)."},{"issue":"2","key":"2865_CR36","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/s11192-012-0681-1","volume":"93","author":"J Wang","year":"2012","unstructured":"Wang, J., Berzins, K., Hicks, D., Melkers, J., Xiao, F., & Pinheiro, D. (2012). A boosted-trees method for name disambiguation. Scientometrics, 93(2), 391\u2013411.","journal-title":"Scientometrics"},{"key":"2865_CR37","doi-asserted-by":"crossref","unstructured":"Wang, X., Tang, J., Cheng, H., & Yu, P. S. (2011). ADANA: Active name disambiguation. Paper presented at the 2011 IEEE 11th international conference on data mining.","DOI":"10.1109\/ICDM.2011.19"},{"issue":"06","key":"2865_CR38","doi-asserted-by":"publisher","first-page":"1417","DOI":"10.1142\/S0218001493000698","volume":"7","author":"KS Woods","year":"1993","unstructured":"Woods, K. S., Doss, C. C., Bowyer, K. W., Solka, J. L., Priebe, C. E., & Kegelmeyer, W. P., Jr. (1993). Comparative evaluation of pattern recognition techniques for detection of microcalcifications in mammography. International Journal of Pattern Recognition and Artificial Intelligence, 7(06), 1417\u20131436.","journal-title":"International Journal of Pattern Recognition and Artificial Intelligence"}],"container-title":["Scientometrics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11192-018-2865-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-018-2865-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-018-2865-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,16]],"date-time":"2020-05-16T21:21:42Z","timestamp":1589664102000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11192-018-2865-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,7,27]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,10]]}},"alternative-id":["2865"],"URL":"https:\/\/doi.org\/10.1007\/s11192-018-2865-9","relation":{},"ISSN":["0138-9130","1588-2861"],"issn-type":[{"value":"0138-9130","type":"print"},{"value":"1588-2861","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,7,27]]},"assertion":[{"value":"29 March 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 July 2018","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}