{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T11:04:11Z","timestamp":1763809451189,"version":"3.41.0"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2015,12,1]],"date-time":"2015-12-01T00:00:00Z","timestamp":1448928000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Visual Languages &amp; Computing"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1016\/j.jvlc.2015.10.017","type":"journal-article","created":{"date-parts":[[2015,11,3]],"date-time":"2015-11-03T13:52:18Z","timestamp":1446558738000},"page":"130-138","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":29,"special_numbering":"PB","title":["A hadoop based platform for natural language processing of web pages and documents"],"prefix":"10.1016","volume":"31","author":[{"given":"Paolo","family":"Nesi","sequence":"first","affiliation":[]},{"given":"Gianni","family":"Pantaleo","sequence":"additional","affiliation":[]},{"given":"Gianmarco","family":"Sanesi","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jvlc.2015.10.017_bib1","unstructured":"V. Turner, J.F. Gantz, D. Reinsel, S. Minton, The Digital Universe of Opportunities: Rich Data and the Increasing Value of the Internet of Things, IDC White Paper, 2014"},{"issue":"9","key":"10.1016\/j.jvlc.2015.10.017_bib2","first-page":"8040","article-title":"A Concise Survey on Text Data Mining","volume":"3","author":"Monali","year":"2014","journal-title":"Int. J. Adv. Res. Comput. Commun. Eng."},{"key":"10.1016\/j.jvlc.2015.10.017_bib3","article-title":"Data-Intensive text processing with MapReduce","volume":"177","author":"Lin","year":"2010"},{"issue":"2","key":"10.1016\/j.jvlc.2015.10.017_bib4","doi-asserted-by":"crossref","first-page":"285","DOI":"10.4208\/cicp.110113.010813a","article-title":"A Survey on parallel computing and its applications in data-parallel problems using GPU architectures","volume":"15","author":"Navarro","year":"2014","journal-title":"Commun. Comput. Phys."},{"key":"10.1016\/j.jvlc.2015.10.017_bib5","unstructured":"P. Jindal, D. Roth, L.V Kale,\u00a0Efficient development of parallel NLP applications, Tech. Report of IDEALS (Illinois Digital Environment for Access to Learning and Scholarship), 2013."},{"key":"10.1016\/j.jvlc.2015.10.017_bib6","doi-asserted-by":"crossref","unstructured":"S. Orlando, R. Perego and F. Silvestri, Design of a Parallel and Distributed WEB Search Engine, In: Proceedings of the Parallel Computing (ParCo) Conference, Imperial College Press, September, 2001","DOI":"10.1142\/9781860949630_0025"},{"key":"10.1016\/j.jvlc.2015.10.017_bib7","doi-asserted-by":"crossref","unstructured":"O. Kononenko, O. Baysal, R. Holmes, M.W. Godfrey, Mining modern repositories with elasticsearch, In: Proceedings of the 11th Working Conference on Mining Software Repositories, 2014, pp. 328\u2013331.","DOI":"10.1145\/2597073.2597091"},{"key":"10.1016\/j.jvlc.2015.10.017_bib8","doi-asserted-by":"crossref","unstructured":"S. Ghemawat, H. Gobioff, S.T. Leung, The google file system, In: Proceeedings of the 19th ACM Symposium on Operating Systems Principles, Lake George, NY, October, 2003.","DOI":"10.1145\/945445.945450"},{"key":"10.1016\/j.jvlc.2015.10.017_bib9","doi-asserted-by":"crossref","first-page":"689","DOI":"10.1016\/j.chb.2013.07.043","article-title":"Text classification using a few labeled examples","volume":"30","author":"Colace","year":"2014","journal-title":"Comput. Hum. Behav."},{"issue":"4","key":"10.1016\/j.jvlc.2015.10.017_bib10","first-page":"164","article-title":"Text summarization extraction system (TSES) using extracted keywords","volume":"1","author":"Al-Hashemi","year":"2010","journal-title":"Int. Arab J. e-Technol."},{"issue":"1","key":"10.1016\/j.jvlc.2015.10.017_bib11","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1016\/j.ipm.2014.07.004","article-title":"Weighted word pairs for query expansion","volume":"51","author":"Colace","year":"2015","journal-title":"Inf. Process. Manag."},{"issue":"3\u20134","key":"10.1016\/j.jvlc.2015.10.017_bib12","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1017\/S135132490400350X","article-title":"International standard for a linguistic annotation framework","volume":"10","author":"Ide.","year":"2004","journal-title":"Nat. Lang. Eng."},{"key":"10.1016\/j.jvlc.2015.10.017_bib13","doi-asserted-by":"crossref","unstructured":"A. Hulth, Improved automatic keyword extraction given more linguistic knowledge, In: Proceedings of the 2003 Conference on Emprical Methods in Natural Language Processing, Sapporo, Japan, 2003.","DOI":"10.3115\/1119355.1119383"},{"key":"10.1016\/j.jvlc.2015.10.017_bib14","unstructured":"T. Luis, Parallelization of Natural Language Processing Algorithms on Distributed Systems (master thesis), Information Systems and Computer Engineering, Instituto Superior T\u00e9cnico, Univ. T\u00e9cncica de Lisboa, 2008"},{"key":"10.1016\/j.jvlc.2015.10.017_bib15","unstructured":"T. Hamon, J. Deriviere, Nazarenko, Ogmios: a scalable NLP platform for annotating large web document collections, In: Proceedings of the Corpus Linguistics, Birmingham, United Kingdom, 2007."},{"key":"10.1016\/j.jvlc.2015.10.017_bib16","doi-asserted-by":"crossref","unstructured":"P. Nesi, G. Pantaleo, G. Sanesi, A distributed framework for NLP-based keyword and keyphrase extraction from web pages and documents, In: Proceedings of 21st International Conference on Distibuted Multimedia Systems (DMS2015), 2015.","DOI":"10.18293\/DMS2015-024"},{"key":"10.1016\/j.jvlc.2015.10.017_bib17","unstructured":"H. Cunningham, D. Maynard, K. Bontcheva, V. Tablan, GATE: a framework and graphical development enviroment for robust NLP tools and applications, In: Proceedings of the 40th Anniversary Meeting of the Association for Computational Linguistics, ACL \u201802, Philadelphia, 2002."},{"key":"10.1016\/j.jvlc.2015.10.017_bib18","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1023\/A:1009976227802","article-title":"Learning algorithms for keyphrase extraction","volume":"2","author":"Turney","year":"2000","journal-title":"Inf. Retr."},{"key":"10.1016\/j.jvlc.2015.10.017_bib19","article-title":"Automatic keyword extraction from documents using conditional random fields","author":"Zhang","year":"2008","journal-title":"J. Comput. Inf. Syst."},{"key":"10.1016\/j.jvlc.2015.10.017_bib20","doi-asserted-by":"crossref","DOI":"10.1142\/S0218213004001466","article-title":"Keyword extraction from a single document using word co-ocuurrence statistical information","author":"Matsuo","year":"2004","journal-title":"Int. J. Artif. Intell. Tools"},{"key":"10.1016\/j.jvlc.2015.10.017_bib21","doi-asserted-by":"crossref","unstructured":"A. Azcarraga, M. David Liu, R. Setiono, Keyword extraction using backpropagation neural networks and rule extraction, In: Proceedings of IEEE World Congress on Computational Intelligence (WCCI), Brisbane, Australia, June, 2012.","DOI":"10.1109\/IJCNN.2012.6252618"},{"issue":"2","key":"10.1016\/j.jvlc.2015.10.017_bib22","article-title":"Keyword and keyphrase extraction techniques: a literature review","volume":"109","author":"Siddiqi","year":"2015","journal-title":"Int. J. Comput. Appl."},{"issue":"6","key":"10.1016\/j.jvlc.2015.10.017_bib23","first-page":"144","article-title":"Effective approaches for extraction of keywords","volume":"7","author":"Kaur","year":"2010","journal-title":"Int. J. Comput. Sci. Issues"},{"key":"10.1016\/j.jvlc.2015.10.017_bib24","doi-asserted-by":"crossref","unstructured":"I. Witten, G. Paynte, E. Frank, C. Gutwin, C. Nevill-Manning, KEA: practical automatic keyphrase extraction, In: Proceedings of the 4th ACM Conference on Digital Library, 1999.","DOI":"10.1145\/313238.313437"},{"issue":"10","key":"10.1016\/j.jvlc.2015.10.017_bib25","first-page":"1471","article-title":"Machine learning-based keywords extraction for scientific literature","volume":"13","author":"Wu","year":"2007","journal-title":"J. Univ. Comput. Sci."},{"key":"10.1016\/j.jvlc.2015.10.017_bib26","doi-asserted-by":"crossref","unstructured":"Z. Liu, P. Li, Y. Zheng, M. Sun, Clustering to find exemplar terms for keyphrase extraction, In: Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing, 2009, pp. 257\u2013266.","DOI":"10.3115\/1699510.1699544"},{"key":"10.1016\/j.jvlc.2015.10.017_bib27","doi-asserted-by":"crossref","unstructured":"F. Liu, D. Pennell, F. Liu, Yang Liu, Unsupervised approaches for automatic keyword extraction using meeting transcripts, In: Proceedings of the Human Language Technologies: The Annual Conference of the North American Chapter of the Association for Computational Linguistics,\u00a02009, pp. 620\u2013628.","DOI":"10.3115\/1620754.1620845"},{"key":"10.1016\/j.jvlc.2015.10.017_bib28","doi-asserted-by":"crossref","unstructured":"O. Medelyan, E. Frank, I.H. Witten, Human-competitive tagging using automatic keyphrase extraction, In: Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing, 2009, pp. 1318\u20131327.","DOI":"10.3115\/1699648.1699678"},{"key":"10.1016\/j.jvlc.2015.10.017_bib29","doi-asserted-by":"crossref","unstructured":"K.S. Hasan, V. Ng, Automatic keyphrase extraction: a survey of the state of the art, In: Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics, vol. 1, 2014, pp. 1262\u20131273.","DOI":"10.3115\/v1\/P14-1119"},{"issue":"Issue 2","key":"10.1016\/j.jvlc.2015.10.017_bib30","first-page":"16","article-title":"A new approach to keyphrase extraction using neural networks","volume":"vol. 7","author":"Sarkar","year":"2010","journal-title":"Int. J. Comput. Sci. Issues"},{"key":"10.1016\/j.jvlc.2015.10.017_bib31","doi-asserted-by":"crossref","unstructured":"M. Grineva, M. Grinev, D. Lizorkin, Extracting key terms from noisy and multitheme documents, In: Proceedings of the 18th International Conference on World Wide Web, 2009, pp. 661\u201367.","DOI":"10.1145\/1526709.1526798"},{"key":"10.1016\/j.jvlc.2015.10.017_bib32","unstructured":"Z. Liu, W. Huang, Y. Zheng, M. Sun, Automatic keyphrase extraction via topic decomposition, In: Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing, 2010, pp. 366\u2013376."},{"issue":"3","key":"10.1016\/j.jvlc.2015.10.017_bib33","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1109\/69.390246","article-title":"Parallel natural language processing on a semantic network array processor","volume":"vol. 7","author":"Chung","year":"1995","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.jvlc.2015.10.017_bib34","doi-asserted-by":"crossref","unstructured":"M.P. van Lohuizen, Parallel processing of natural language parsers, In: Proceedings of the 15th Conference of Parallel Computing, 2000, pp. 17\u201320.","DOI":"10.1142\/9781848160170_0019"},{"key":"10.1016\/j.jvlc.2015.10.017_bib35","unstructured":"N. Rizzolo, D. Roth, Learning based java for rapid development of NLP systems, In: Proceedings of the International Conference on Language Resources and Evaluation (LREC), 2010."},{"key":"10.1016\/j.jvlc.2015.10.017_bib36","series-title":"Advanced Computational Infrastructures for Parallel and Distributed Applications","first-page":"265","article-title":"Charm++ and AMPI: adaptive runtime strategies via migratable objects","author":"Kale","year":"2009"},{"key":"10.1016\/j.jvlc.2015.10.017_bib37","doi-asserted-by":"crossref","unstructured":"P. Exner, P. Nugues, KOSHIK-A large-scale distributed computing framework for NLP, In: Proceedings of the International Conference on Pattern Recognition Applications and Methods (ICPRAM 2014), 2014, pp. 463\u2013470.","DOI":"10.5220\/0004707704630470"},{"key":"10.1016\/j.jvlc.2015.10.017_bib38","article-title":"GATECloud.net: a platform for large-scale, open-source text processing on the cloud","volume":"37","author":"Tablan","year":"2013","journal-title":"Philos. Trans. R. Soc."},{"key":"10.1016\/j.jvlc.2015.10.017_bib39","unstructured":"M. Zaharia, M. Chowdhury, M.J. Franklin, S. Shenker, I. Stoica, Spark: Cluster Computing with Working Sets, Technology Report of UC Berkeley, 2011"},{"key":"10.1016\/j.jvlc.2015.10.017_bib40","unstructured":"M. Zaharia, M. Chowdhury, T. Das, A. Dave, J. Ma, M. McCauly, M.J. Franklin, S. Shenker, I. Stoica, Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing, In: Proceedings of the 9th USENIX Symposium on Networked Systems Design and Implementation (NSDI 12), 2012, pp. 15\u201328."},{"issue":"1","key":"10.1016\/j.jvlc.2015.10.017_bib41","first-page":"8","article-title":"Comparing apache spark and map reduce with performance analysis using K-means","volume":"113","author":"Gopalani","year":"2015","journal-title":"Int. J. Comput. Appl."},{"year":"2012","series-title":"Hadoop, the Definitive Guide","author":"White","key":"10.1016\/j.jvlc.2015.10.017_bib42"},{"year":"2012","series-title":"Hadoop in Practice","key":"10.1016\/j.jvlc.2015.10.017_bib43"}],"container-title":["Journal of Visual Languages &amp; Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1045926X15000749?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1045926X15000749?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T08:31:00Z","timestamp":1748680260000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1045926X15000749"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,12]]},"references-count":43,"alternative-id":["S1045926X15000749"],"URL":"https:\/\/doi.org\/10.1016\/j.jvlc.2015.10.017","relation":{},"ISSN":["1045-926X"],"issn-type":[{"type":"print","value":"1045-926X"}],"subject":[],"published":{"date-parts":[[2015,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A hadoop based platform for natural language processing of web pages and documents","name":"articletitle","label":"Article Title"},{"value":"Journal of Visual Languages & Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jvlc.2015.10.017","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Copyright \u00a9 2015 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}]}}