{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,9]],"date-time":"2025-06-09T22:26:29Z","timestamp":1749507989988,"version":"3.28.0"},"reference-count":18,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1109\/bigdata.2016.7840598","type":"proceedings-article","created":{"date-parts":[[2017,2,7]],"date-time":"2017-02-07T21:46:59Z","timestamp":1486504019000},"page":"134-141","source":"Crossref","is-referenced-by-count":5,"title":["A theoretical model for n-gram distribution in big data corpora"],"prefix":"10.1109","author":[{"given":"Joaquim F.","family":"Silva","sequence":"first","affiliation":[]},{"given":"Carlos","family":"Goncalves","sequence":"additional","affiliation":[]},{"given":"Jose C.","family":"Cunha","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(2000)51:1<69::AID-ASI10>3.0.CO;2-C"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2009.12.001"},{"key":"ref12","article-title":"One-pass, one-hash n-gram statistics estimation","volume":"abs cs 610010","author":"lemire","year":"2006","journal-title":"CoRR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199210)43:9<616::AID-ASI4>3.0.CO;2-A"},{"article-title":"Handbook of the Poisson Distribution","year":"1967","author":"haight","key":"ref14"},{"key":"ref15","article-title":"Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables","author":"abramowitz","year":"1972","journal-title":"10th printing"},{"journal-title":"Wikipedia (2016 February) Wikimedia downloads","year":"0","key":"ref16"},{"key":"ref17","first-page":"369","article-title":"A local maxima method and a fair dispersion normalization for extracting multi word units","author":"silva","year":"1999","journal-title":"Proceedings of the 6th Meeting on the Mathematics of Language"},{"key":"ref18","article-title":"An n-gram cache for large-scale parallel extraction of multi word relevant expressions with LocalMaxs","author":"gon\u00e7alves","year":"2016","journal-title":"To appear in 12th IEEE International Conference on eScience"},{"article-title":"Human Behavior and the Principle of Least-Effort","year":"1949","author":"zipf","key":"ref4"},{"key":"ref3","first-page":"406","article-title":"On sampling from a lognormal model of word frequency distribution","volume":"16","author":"carroll","year":"1967","journal-title":"Computational Analysis of Present-Day American English"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3758\/s13423-014-0585-6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1080\/09296170902850358"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2004.03.006"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/asi.20524"},{"key":"ref2","first-page":"190","article-title":"On the theory of word frequencies and on related markovian models of discourse","volume":"xii","author":"mandelbrot","year":"1953","journal-title":"Structure of Language and Its Mathematical Aspects"},{"article-title":"The Psychobiology of Language: An Introduction to Dynamic Philology","year":"1935","author":"zipf","key":"ref1"},{"key":"ref9","first-page":"347","article-title":"Zipf's law outside the middle range","author":"kornai","year":"1999","journal-title":"Proceedings of the Sixth Meeting on Mathematics of Language (MOL) University of Central Florida"}],"event":{"name":"2016 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2016,12,5]]},"location":"Washington DC,USA","end":{"date-parts":[[2016,12,8]]}},"container-title":["2016 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7818133\/7840573\/07840598.pdf?arnumber=7840598","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,12,13]],"date-time":"2017-12-13T19:32:33Z","timestamp":1513193553000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7840598\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/bigdata.2016.7840598","relation":{},"subject":[],"published":{"date-parts":[[2016,12]]}}}