{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T22:28:36Z","timestamp":1775687316375,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2017,8,10]],"date-time":"2017-08-10T00:00:00Z","timestamp":1502323200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1007\/s10994-016-5620-6","type":"journal-article","created":{"date-parts":[[2017,8,10]],"date-time":"2017-08-10T19:02:36Z","timestamp":1502391756000},"page":"1771-1785","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Asymptotic properties of Turing\u2019s formula in relative error"],"prefix":"10.1007","volume":"106","author":[{"given":"Michael","family":"Grabchak","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyi","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,8,10]]},"reference":[{"key":"5620_CR1","volume-title":"Handbook of mathematical functions","author":"M Abramowitz","year":"1972","unstructured":"Abramowitz, M., & Stegun, I. A. (1972). Handbook of mathematical functions (10th ed.). New York: Dover Publications.","edition":"10"},{"issue":"1","key":"5620_CR2","doi-asserted-by":"crossref","first-page":"249","DOI":"10.3150\/15-BEJ743","volume":"23","author":"A Ben-Hamou","year":"2017","unstructured":"Ben-Hamou, A., Boucheron, S., & Ohannessian, M. I. (2017). Concentration inequalities in the infinite urn scheme for occupancy counts and the missing mass, with applications. Bernoulli, 23(1), 249\u2013287.","journal-title":"Bernoulli"},{"issue":"3","key":"5620_CR3","first-page":"1","volume":"18","author":"D Berend","year":"2013","unstructured":"Berend, D., & Kontorovich, A. (2013). On the concentration of the missing mass. Electronic Communications in Probability, 18(3), 1\u20137.","journal-title":"Electronic Communications in Probability"},{"key":"5620_CR4","volume-title":"Regular variation. Encyclopedia of mathematics and its applications","author":"NH Bingham","year":"1987","unstructured":"Bingham, N. H., Goldie, C. M., & Teugels, J. L. (1987). Regular variation. Encyclopedia of mathematics and its applications. Cambridge: Cambridge University Press."},{"issue":"6","key":"5620_CR5","doi-asserted-by":"crossref","first-page":"1339","DOI":"10.1214\/aos\/1176345651","volume":"9","author":"A Chao","year":"1981","unstructured":"Chao, A. (1981). On estimating the probability of discovering a new species. The Annals of Statistics, 9(6), 1339\u20131342.","journal-title":"The Annals of Statistics"},{"issue":"5","key":"5620_CR6","doi-asserted-by":"crossref","first-page":"1189","DOI":"10.1890\/14-0550.1","volume":"96","author":"A Chao","year":"2015","unstructured":"Chao, A., Hsieh, T. C., Chazdon, R. L., Colwell, R. K., & Gotelli, N. J. (2015). Unveiling the species-rank abundance distribution by generalizing the Good\u2013Turing sample coverage theory. Ecology, 96(5), 1189\u20131201.","journal-title":"Ecology"},{"issue":"4","key":"5620_CR7","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1006\/csla.1999.0128","volume":"13","author":"SF Chen","year":"1999","unstructured":"Chen, S. F., & Goodman, J. (1999). An empirical study of smoothing techniques for language modeling. Computer Speech and Language, 13(4), 359\u2013394.","journal-title":"Computer Speech and Language"},{"issue":"4","key":"5620_CR8","doi-asserted-by":"crossref","first-page":"623","DOI":"10.1007\/BF02481141","volume":"42","author":"A Cohen","year":"1990","unstructured":"Cohen, A., & Sackrowitz, H. B. (1990). Admissibility of estimators of the probability of unobserved outcomes. Annals of the Institute of Statistical Mathematics, 42(4), 623\u2013636.","journal-title":"Annals of the Institute of Statistical Mathematics"},{"key":"5620_CR9","unstructured":"Decrouez, G., Grabchak, M., & Paris, Q. (2016). Finite sample properties of the mean occupancy counts and probabilities. Bernoulli (to appear). \n                        arXiv:1601.06537v2\n                        \n                    ."},{"issue":"2","key":"5620_CR10","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1002\/(SICI)1098-2418(199809)13:2<99::AID-RSA1>3.0.CO;2-M","volume":"13","author":"D Dubhashi","year":"1998","unstructured":"Dubhashi, D., & Ranjan, D. (1998). Balls and bins: A study in negative dependence. Random Structures and Algorithms, 13(2), 99\u2013124.","journal-title":"Random Structures and Algorithms"},{"issue":"3","key":"5620_CR11","first-page":"435","volume":"63","author":"B Efron","year":"1976","unstructured":"Efron, B., & Thisted, R. (1976). Estimating the number of unseen species: How many words did Shakespeare know? Biometrika, 63(3), 435\u2013447.","journal-title":"Biometrika"},{"issue":"3","key":"5620_CR12","doi-asserted-by":"crossref","first-page":"905","DOI":"10.1214\/aos\/1176346256","volume":"11","author":"WW Esty","year":"1983","unstructured":"Esty, W. W. (1983). A normal limit law for a nonparametric estimator of the coverage of a random sample. Annals of Statistics, 11(3), 905\u2013912.","journal-title":"Annals of Statistics"},{"issue":"1","key":"5620_CR13","doi-asserted-by":"crossref","first-page":"136","DOI":"10.1111\/biom.12366","volume":"72","author":"S Favaro","year":"2016","unstructured":"Favaro, S., Nipoti, B., & Teh, Y. W. (2016). Rediscovery of Good\u2013Turing estimators via Bayesian nonparametrics. Biometrics, 72(1), 136\u2013145.","journal-title":"Biometrics"},{"key":"5620_CR14","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1214\/07-PS092","volume":"4","author":"A Gnedin","year":"2007","unstructured":"Gnedin, A., Hansen, B., & Pitman, J. (2007). Notes on the occupancy problem with infinitely many boxes: General asymptotics and power laws. Probability Surveys, 4, 146\u2013171.","journal-title":"Probability Surveys"},{"issue":"3\/4","key":"5620_CR15","doi-asserted-by":"crossref","first-page":"237","DOI":"10.2307\/2333344","volume":"40","author":"IJ Good","year":"1953","unstructured":"Good, I. J. (1953). The population frequencies of species and the estimation of population parameters. Biometrika, 40(3\/4), 237\u2013264.","journal-title":"Biometrika"},{"issue":"1\u20132","key":"5620_CR16","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1093\/biomet\/43.1-2.45","volume":"43","author":"IJ Good","year":"1956","unstructured":"Good, I. J., & Toulmin, G. H. (1956). The number of new species, and the increase in population coverage, when a sample is increased. Biometrika, 43(1\u20132), 45\u201363.","journal-title":"Biometrika"},{"issue":"6","key":"5620_CR17","doi-asserted-by":"crossref","first-page":"4199","DOI":"10.1080\/03610918.2015.1109658","volume":"46","author":"M Grabchak","year":"2017","unstructured":"Grabchak, M., & Cosme, V. (2017). On the performance of Turing\u2019s formula: A simulation study. Communication in Statistics: Simulation and Computation, 46(6), 4199\u20134209.","journal-title":"Communication in Statistics: Simulation and Computation"},{"issue":"4","key":"5620_CR18","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1016\/0885-2308(92)90027-2","volume":"6","author":"V Gupta","year":"1992","unstructured":"Gupta, V., Lennig, M., & Mermelstein, P. (1992). A language model for very large-vocabulary speech recognition. Computer Speech and Language, 6(4), 331\u2013344.","journal-title":"Computer Speech and Language"},{"key":"5620_CR19","first-page":"373","volume":"17","author":"S Karlin","year":"1967","unstructured":"Karlin, S. (1967). Central limit theorems for certain infinite urn schemes. Journal of Mathematical Mechanics, 17, 373\u2013401.","journal-title":"Journal of Mathematical Mechanics"},{"issue":"2","key":"5620_CR20","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1093\/biomet\/55.2.422","volume":"55","author":"CL Mallows","year":"1968","unstructured":"Mallows, C. L. (1968). An inequality involving multinomial probabilities. Biometrika, 55(2), 422\u2013424.","journal-title":"Biometrika"},{"issue":"3","key":"5620_CR21","doi-asserted-by":"crossref","first-page":"669","DOI":"10.1093\/biomet\/89.3.669","volume":"89","author":"CX Mao","year":"2002","unstructured":"Mao, C. X., & Lindsay, B. G. (2002). A Poisson model for the coverage problem with a genomic application. Biometrika, 89(3), 669\u2013681.","journal-title":"Biometrika"},{"key":"5620_CR22","unstructured":"McAllester, D. A., & Schapire, R. E. (2000). On the convergence rate of Good\u2013Turing estimators. In COLT \u201900: Proceedings of the thirteenth annual conference on computational learning theory (pp. 1\u20136)."},{"issue":"Oct","key":"5620_CR23","first-page":"895","volume":"4","author":"DA McAllester","year":"2003","unstructured":"McAllester, D. A., & Ortiz, L. E. (2003). Concentration inequalities for the missing mass and for histogram rule error. Journal of Machine Learning Research, 4(Oct), 895\u2013911.","journal-title":"Journal of Machine Learning Research"},{"key":"5620_CR24","unstructured":"Mossel, E., & Ohannessian, M. I. (2015). On the impossibility of learning the missing mass. \n                        arXiv:1503.03613v1\n                        \n                    ."},{"key":"5620_CR25","unstructured":"Ohannessian, M. I., & Dahleh, M. A. (2012). Rare probability estimation under regularly varying heavy tails. In JMLR workshop and conference proceedings (Vol. 23, pp. 21.1\u201321.24)."},{"key":"5620_CR26","volume-title":"Heavy-tail phenomena: Probabilistic and statistical modeling","author":"SI Resnick","year":"2007","unstructured":"Resnick, S. I. (2007). Heavy-tail phenomena: Probabilistic and statistical modeling. New York: Springer."},{"issue":"1","key":"5620_CR27","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1214\/aoms\/1177698526","volume":"39","author":"HE Robbins","year":"1968","unstructured":"Robbins, H. E. (1968). Estimating the total probability of the unobserved outcomes of an experiment. Annals of Mathematical Statistics, 39(1), 256\u2013257.","journal-title":"Annals of Mathematical Statistics"},{"issue":"3","key":"5620_CR28","doi-asserted-by":"crossref","first-page":"445","DOI":"10.1093\/biomet\/74.3.445","volume":"74","author":"R Thisted","year":"1987","unstructured":"Thisted, R., & Efron, B. (1987). Did Shakespeare write a newly discovered poem. Biometrika, 74(3), 445\u2013455.","journal-title":"Biometrika"},{"issue":"5","key":"5620_CR29","doi-asserted-by":"crossref","first-page":"2022","DOI":"10.1214\/009053605000000390","volume":"33","author":"CH Zhang","year":"2005","unstructured":"Zhang, C. H. (2005). Estimation of sums of random variables: Examples and information bounds. The Annals of Statistics, 33(5), 2022\u20132041.","journal-title":"The Annals of Statistics"},{"issue":"5A","key":"5620_CR30","doi-asserted-by":"crossref","first-page":"2582","DOI":"10.1214\/08-AOS658","volume":"37","author":"CH Zhang","year":"2009","unstructured":"Zhang, C. H., & Zhang, Z. (2009). Asymptotic normality of a nonparametric estimator of sample coverage. Annals of Statistics, 37(5A), 2582\u20132595.","journal-title":"Annals of Statistics"},{"issue":"1","key":"5620_CR31","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1007\/s13171-012-0019-3","volume":"75","author":"Z Zhang","year":"2013","unstructured":"Zhang, Z. (2013). A multivariate normal law for Turing\u2019s formulae. Sankhya A, 75(1), 51\u201373.","journal-title":"Sankhya A"},{"issue":"2\u20133","key":"5620_CR32","doi-asserted-by":"crossref","first-page":"222","DOI":"10.1080\/09296170701514189","volume":"14","author":"Z Zhang","year":"2007","unstructured":"Zhang, Z., & Huang, H. (2007). Turing\u2019s formula revisited. Journal of Quantitative Linguistics, 14(2\u20133), 222\u2013241.","journal-title":"Journal of Quantitative Linguistics"},{"issue":"5","key":"5620_CR33","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1080\/10485250802172126","volume":"20","author":"Z Zhang","year":"2008","unstructured":"Zhang, Z., & Huang, H. (2008). A sufficient normality condition for Turing\u2019s formula. Journal of Nonparametric Statistics, 20(5), 431\u2013446.","journal-title":"Journal of Nonparametric Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-016-5620-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-016-5620-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-016-5620-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,10,4]],"date-time":"2017-10-04T08:55:19Z","timestamp":1507107319000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-016-5620-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,8,10]]},"references-count":33,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2017,11]]}},"alternative-id":["5620"],"URL":"https:\/\/doi.org\/10.1007\/s10994-016-5620-6","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,8,10]]}}}