{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,9]],"date-time":"2025-06-09T22:40:10Z","timestamp":1749508810546,"version":"3.41.0"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030779603"},{"type":"electronic","value":"9783030779610"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-77961-0_55","type":"book-chapter","created":{"date-parts":[[2021,6,10]],"date-time":"2021-06-10T19:07:58Z","timestamp":1623352078000},"page":"699-706","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Model for Predicting n-gram Frequency Distribution in Large Corpora"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5223-1180","authenticated-orcid":false,"given":"Joaquim F.","family":"Silva","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6729-8348","authenticated-orcid":false,"given":"Jose C.","family":"Cunha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,9]]},"reference":[{"key":"55_CR1","doi-asserted-by":"crossref","unstructured":"Ausloos, M., Cerqueti, R.: A universal rank-size law. PLoS ONE 11(11) (2016)","DOI":"10.1371\/journal.pone.0166011"},{"key":"55_CR2","first-page":"1","volume":"4","author":"VK Balasubrahmanyan","year":"2002","unstructured":"Balasubrahmanyan, V.K., Naranan, S.: Algorithmic information, complexity and zipf\u2019s law. Glottometrics 4, 1\u201326 (2002)","journal-title":"Glottometrics"},{"issue":"12","key":"55_CR3","doi-asserted-by":"publisher","first-page":"123015","DOI":"10.1088\/1367-2630\/11\/12\/123015","volume":"11","author":"S Bernhardsson","year":"2009","unstructured":"Bernhardsson, S., da Rocha, L.E.C., Minnhagen, P.: The meta book and size-dependent properties of written language. New J. Phys. 11(12), 123015 (2009)","journal-title":"New J. Phys."},{"issue":"4","key":"55_CR4","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1016\/S0019-9958(67)90201-X","volume":"10","author":"AD Booth","year":"1967","unstructured":"Booth, A.D.: A law of occurrences for words of low frequency. Inf. Control 10(4), 386\u2013393 (1967)","journal-title":"Inf. Control"},{"key":"55_CR5","unstructured":"Brants, T., Popat, A.C., Xu, P., Och, F.J., Dean, J.: Large language models in machine translation. In: Joint Conference on Empirical Methods in NLP and Computational Natural Language Learning, pp. 858\u2013867. ACL (2007)"},{"issue":"3","key":"55_CR6","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1076\/jqul.8.3.165.4101","volume":"8","author":"RF Cancho","year":"2001","unstructured":"Cancho, R.F., Sol\u00e9, R.V.: Two regimes in the frequency of words and the origins of complex lexicons: Zipf\u2019s law revisited*. J. Quant. Linguist. 8(3), 165\u2013173 (2001)","journal-title":"J. Quant. Linguist."},{"key":"55_CR7","doi-asserted-by":"crossref","unstructured":"Dias, G.: Multiword unit hybrid extraction. In: ACL Workshop on Multiword Expressions, vol. 18, pp. 41\u201348. ACL (2003)","DOI":"10.3115\/1119282.1119288"},{"key":"55_CR8","first-page":"021006","volume":"3","author":"M Gerlach","year":"2013","unstructured":"Gerlach, M., Altmann, E.G.: Stochastic model for the vocabulary growth in natural languages. Phys. Rev. X 3, 021006 (2013)","journal-title":"Phys. Rev. X"},{"key":"55_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/978-3-030-22741-8_6","volume-title":"Computational Science \u2013 ICCS 2019","author":"C Goncalves","year":"2019","unstructured":"Goncalves, C., Silva, J.F., Cunha, J.C.: n-gram cache performance in statistical extraction of relevant terms in large Corpora. In: Rodrigues, J.M.F., et al. (eds.) ICCS 2019. LNCS, vol. 11537, pp. 75\u201388. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-22741-8_6"},{"key":"55_CR10","volume-title":"Handbook of the Poisson Distribution","author":"FA Haight","year":"1967","unstructured":"Haight, F.A.: Handbook of the Poisson Distribution. John Wiley & Sons, New York (1967)"},{"issue":"1082","key":"55_CR11","first-page":"1","volume":"3","author":"L L\u00fc","year":"2013","unstructured":"L\u00fc, L., Zhang, Z.K., Zhou, T.: Deviation of zipf\u2019s and heaps\u2019 laws in human languages with limited dictionary sizes. Sci. Rep. 3(1082), 1\u20137 (2013)","journal-title":"Sci. Rep."},{"key":"55_CR12","unstructured":"Mandelbrot, B.: On the theory of word frequencies and on related Markovian models of discourse. In: Structural of Language and its Mathematical Aspects (1953)"},{"issue":"2","key":"55_CR13","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1080\/15427951.2004.10129088","volume":"1","author":"M Mitzenmacher","year":"2003","unstructured":"Mitzenmacher, M.: A brief history of generative models for power law and lognormal distributions. Internet Math. 1(2), 226\u2013251 (2003)","journal-title":"Internet Math."},{"key":"55_CR14","doi-asserted-by":"publisher","first-page":"1112","DOI":"10.3758\/s13423-014-0585-6","volume":"21","author":"ST Piantadosi","year":"2014","unstructured":"Piantadosi, S.T.: Zipf\u2019s word frequency law in natural language: a critical review and future directions. Psychonomic Bull. Rev. 21, 1112\u20131130 (2014)","journal-title":"Psychonomic Bull. Rev."},{"key":"55_CR15","doi-asserted-by":"crossref","unstructured":"Silva, J., Mexia, J., Coelho, A., Lopes, G.: Document clustering and cluster topic extraction in multilingual corpora. In: Proceedings 2001 IEEE International Conference on Data Mining, pp. 513\u2013520 (2001)","DOI":"10.1109\/ICDM.2001.989559"},{"key":"55_CR16","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1007\/978-3-030-47436-2_63","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"JF Silva","year":"2020","unstructured":"Silva, J.F., Cunha, J.C.: An empirical model for n-gram frequency distribution in large corpora. In: Lauw, H.W., et al. (eds.) PAKDD 2020. LNCS (LNAI), vol. 12085, pp. 840\u2013851. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-47436-2_63"},{"key":"55_CR17","doi-asserted-by":"crossref","unstructured":"Silva, J.F., Gon\u00e7alves, C., Cunha, J.C.: A theoretical model for n-gram distribution in big data corpora. In: 2016 IEEE International Conference on Big Data, pp. 134\u2013141 (2016)","DOI":"10.1109\/BigData.2016.7840598"},{"key":"55_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1007\/3-540-48159-1_9","volume-title":"Progress in Artificial Intelligence","author":"JF da Silva","year":"1999","unstructured":"da Silva, J.F., Dias, G., Guillor\u00e9, S., Pereira Lopes, J.G.: Using LocalMaxs algorithm for the extraction of contiguous and non-contiguous multiword lexical units. In: Barahona, P., Alferes, J.J. (eds.) EPIA 1999. LNCS (LNAI), vol. 1695, pp. 113\u2013132. Springer, Heidelberg (1999). https:\/\/doi.org\/10.1007\/3-540-48159-1_9"},{"issue":"3\/4","key":"55_CR19","doi-asserted-by":"publisher","first-page":"425","DOI":"10.2307\/2333389","volume":"42","author":"H Simon","year":"1955","unstructured":"Simon, H.: On a class of skew distribution functions. Biometrika 42(3\/4), 425\u2013440 (1955)","journal-title":"Biometrika"},{"key":"55_CR20","volume-title":"Human Behavior and the Principle of Least-Effort","author":"GK Zipf","year":"1949","unstructured":"Zipf, G.K.: Human Behavior and the Principle of Least-Effort. Addison-Wesley, Cambridge (1949)"}],"container-title":["Lecture Notes in Computer Science","Computational Science \u2013 ICCS 2021"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-77961-0_55","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,9]],"date-time":"2025-06-09T22:05:27Z","timestamp":1749506727000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-77961-0_55"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030779603","9783030779610"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-77961-0_55","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"9 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Science","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Krakow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccs-computsci2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iccs-meeting.org\/iccs2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"156","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"48","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.8","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"212 full and 43 short papers were selected from 479 submissions to the workshops\/ thematic tracks. The conference was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}