{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T14:08:43Z","timestamp":1756994923521,"version":"3.37.3"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"S1","license":[{"start":{"date-parts":[[2017,12,21]],"date-time":"2017-12-21T00:00:00Z","timestamp":1513814400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National High Technology Research and Development 863 Program of China","award":["2015AA124102"],"award-info":[{"award-number":["2015AA124102"]}]},{"name":"Hebei Natural Science Foundation of China","award":["F2015203280"],"award-info":[{"award-number":["F2015203280"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2019,1]]},"DOI":"10.1007\/s10586-017-1498-8","type":"journal-article","created":{"date-parts":[[2017,12,21]],"date-time":"2017-12-21T12:36:30Z","timestamp":1513859790000},"page":"2383-2394","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["A distributed incremental information acquisition model for large-scale text data"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7311-0196","authenticated-orcid":false,"given":"Shengtao","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jibing","family":"Gong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Albert Y.","family":"Zomaya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aizhi","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,12,21]]},"reference":[{"issue":"2","key":"1498_CR1","doi-asserted-by":"publisher","first-page":"793","DOI":"10.1007\/s10586-016-0569-6","volume":"19","author":"L Wang","year":"2016","unstructured":"Wang, L., Song, W., Liu, P.: Link the remote sensing big data to the image features via wavelet transformation. Clust. Comput. 19(2), 793\u2013810 (2016)","journal-title":"Clust. Comput."},{"key":"1498_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00607-015-0471-8","volume":"98","author":"R Ranjan","year":"2016","unstructured":"Ranjan, R., Georgakopoulos, D., Wang, L.: A note on software tools and technologies for delivering smart media-optimized big data applications in the cloud. Computing 98, 1\u20135 (2016)","journal-title":"Computing"},{"issue":"3","key":"1498_CR3","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1109\/TC.2013.2295806","volume":"64","author":"D Chen","year":"2015","unstructured":"Chen, D., Li, X., Wang, L., et al.: Fast and scalable multi-way analysis of massive neural data. IEEE Trans. Comput. 64(3), 707\u2013719 (2015)","journal-title":"IEEE Trans. Comput."},{"key":"1498_CR4","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.future.2016.09.019","volume":"68","author":"Z Deng","year":"2017","unstructured":"Deng, Z., Han, W., Wang, L., et al.: An efficient online direction-preserving compression approach for trajectory streaming data. Fut. Gener. Comput. Syst. 68, 150\u2013162 (2017)","journal-title":"Fut. Gener. Comput. Syst."},{"key":"1498_CR5","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.future.2017.03.019","volume":"73","author":"J Li","year":"2017","unstructured":"Li, J., Zhang, P., Li, Y., et al.: A data-check based distributed storage model for storing hot temporary data. Fut. Gener. Comput. Syst. 73, 13\u201321 (2017)","journal-title":"Fut. Gener. Comput. Syst."},{"key":"1498_CR6","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1145\/1953122.1953148","volume":"54","author":"S Melnik","year":"2011","unstructured":"Melnik, S., Gubarev, A., Long, J.J., et al.: Dremel: interactive analysis of web-scale datasets. Commun. ACM 54, 114\u2013123 (2011)","journal-title":"Commun. ACM"},{"key":"1498_CR7","doi-asserted-by":"crossref","unstructured":"Voras, I., Zagar, M.: Adapting the Bloom filter to multithreaded environments. In: The 15th IEEE Mediterranean Electrotechnical Conference, Valletta, Malta, pp. 1488\u20131493 (2010)","DOI":"10.1109\/MELCON.2010.5476244"},{"issue":"8","key":"1498_CR8","doi-asserted-by":"publisher","first-page":"2126","DOI":"10.1109\/TPDS.2013.272","volume":"25","author":"Y Ma","year":"2014","unstructured":"Ma, Y., Wang, L., Zomaya, A.Y., et al.: Task-tree based large-scale mosaicking for massive remote sensed imageries with dynamic dag scheduling. IEEE Trans. Parallel Distrib. Syst. 25(8), 2126\u20132137 (2014)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"3","key":"1498_CR9","doi-asserted-by":"publisher","first-page":"1283","DOI":"10.1007\/s10586-016-0581-x","volume":"19","author":"Z Xu","year":"2016","unstructured":"Xu, Z., Mei, L., Hu, C., Liu, Y.: The big data analytics and applications of the surveillance system using video structured description technology. Clust. Comput. 19(3), 1283\u20131292 (2016)","journal-title":"Clust. Comput."},{"key":"1498_CR10","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1016\/j.ijhm.2014.10.013","volume":"44","author":"Z Xiang","year":"2015","unstructured":"Xiang, Z., Schwartz, Z., Gerdes Jr., J.H., Uysal, M.: What can big data and text analytics tell us about hotel guest experience and satisfaction? Int. J. Hosp. Manag. 44, 120\u2013130 (2015)","journal-title":"Int. J. Hosp. Manag."},{"issue":"3","key":"1498_CR11","doi-asserted-by":"publisher","first-page":"2363","DOI":"10.1007\/s10586-017-0811-x","volume":"20","author":"K Jensen","year":"2017","unstructured":"Jensen, K., Nguyen, H.T., Van Do, T., Arnes, A.: A big data analytics approach to combat telecommunication vulnerabilities. Clust. Comput. 20(3), 2363\u20132374 (2017)","journal-title":"Clust. Comput."},{"key":"1498_CR12","doi-asserted-by":"crossref","unstructured":"Ma, L., Zhang, Y.: Using Word2Vec to process big text data. In: IEEE International Conference on Big Data, Santa Clara, pp. 2895\u20132897 (2015)","DOI":"10.1109\/BigData.2015.7364114"},{"key":"1498_CR13","first-page":"167","volume-title":"Big Scale Text Analytics and Smart Content Navigation. Enabling Real-Time Business Intelligence, Lecture Notes in Business Information Processing","author":"K Schmidt","year":"2015","unstructured":"Schmidt, K., Bachle, S., Scholl, P., Nold, G.: Big Scale Text Analytics and Smart Content Navigation. Enabling Real-Time Business Intelligence, Lecture Notes in Business Information Processing, vol. 206, pp. 167\u2013170. Springer, Berlin (2015)"},{"issue":"3","key":"1498_CR14","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPDS.2014.2311811","volume":"26","author":"Z Deng","year":"2015","unstructured":"Deng, Z., Wu, X., Wang, L., et al.: Parallel processing of dynamic continuous qeries over streaming data flows. IEEE Trans. Parallel Distrib. Syst. 26(3), 834\u2013846 (2015)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"3","key":"1498_CR15","doi-asserted-by":"publisher","first-page":"847","DOI":"10.1109\/TPDS.2014.2311805","volume":"26","author":"D Chen","year":"2015","unstructured":"Chen, D., Wang, L., Zomaya, A.Y., et al.: Parallel simulation of complex evacuation scenarios with adaptive agent models. IEEE Trans. Parallel Distrib. Syst. 26(3), 847\u2013857 (2015)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"1\u20132","key":"1498_CR16","doi-asserted-by":"publisher","first-page":"4","DOI":"10.14778\/1920841.1920846","volume":"3","author":"J Cho","year":"2010","unstructured":"Cho, J., Garcia-Molina, H.: Dealing with web data: history and look ahead. Proc. VLDB Endow. 3(1\u20132), 4\u20134 (2010)","journal-title":"Proc. VLDB Endow."},{"issue":"1","key":"1498_CR17","doi-asserted-by":"publisher","first-page":"25","DOI":"10.4018\/jitwe.2011010103","volume":"6","author":"DK Sharma","year":"2011","unstructured":"Sharma, D.K., Sharma, A.K.: A novel architecture for deep web crawler. Int. J. Inf. Technol. Web Eng. 6(1), 25\u201348 (2011)","journal-title":"Int. J. Inf. Technol. Web Eng."},{"key":"1498_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Dong, G., Peng, Z., et al.: A framework for incremental deep web crawler based on URL classification. In: The International Conference on Web Information Systems and Mining, Taiyuan, China, pp. 302\u2013310 (2011)","DOI":"10.1007\/978-3-642-23982-3_37"},{"issue":"5","key":"1498_CR19","first-page":"1420","volume":"7","author":"H Guo","year":"2011","unstructured":"Guo, H., Chen, Q., Xin, C., Wang, X., Bi, Ye: A real environment oriented parallel duplicates removal approach for large scale Chinese webpages. J. Comput. Inf. Syst. 7(5), 1420\u20131427 (2011)","journal-title":"J. Comput. Inf. Syst."},{"issue":"4","key":"1498_CR20","doi-asserted-by":"publisher","first-page":"1493","DOI":"10.1007\/s10586-015-0477-1","volume":"18","author":"F Zhang","year":"2015","unstructured":"Zhang, F., Liu, M., Gui, F., Shen, W., Shami, Abdallah, Ma, Yunlong: A distributed frequent itemset mining algorithm using Spark for Big Data analytics. Clust. Comput. 18(4), 1493\u20131501 (2015)","journal-title":"Clust. Comput."},{"key":"1498_CR21","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1016\/j.websem.2011.05.004","volume":"10","author":"J Urbani","year":"2012","unstructured":"Urbani, J., Kotoulas, S., Maassen, J., Van Harmelen, F., Bal, H.: WebPIE: a web-scale parallel inference engine using MapReduce. Web Semant. 10, 59\u201375 (2012)","journal-title":"Web Semant."},{"key":"1498_CR22","first-page":"1751","volume":"10","author":"X Ben","year":"2015","unstructured":"Ben, X., Jia, D., Yuan, L.: A three layer distributed architecture for large-scale duplicated web page detection. Comput. Digital Eng. 10, 1751\u20131755 (2015)","journal-title":"Comput. Digital Eng."},{"key":"1498_CR23","doi-asserted-by":"crossref","unstructured":"Jose, J., Subramoni, H., Luo, M., et al.: Memcached design on high performance RDMA capable interconnects. In: The International Conference on Parallel Processing, Taipei, Taiwan, pp. 743\u2013752 (2011)","DOI":"10.1109\/ICPP.2011.37"},{"key":"1498_CR24","volume-title":"Garlson: Redis in Action","author":"L Josiah","year":"2013","unstructured":"Josiah, L.: Garlson: Redis in Action. Manning Publications Co., Greenwich (2013)"},{"issue":"2","key":"1498_CR25","doi-asserted-by":"publisher","first-page":"879","DOI":"10.1007\/s10586-016-0567-8","volume":"19","author":"R Subramanyam","year":"2016","unstructured":"Subramanyam, R., Gupta, I., Leslie, L.M., Wang, W.: Idempotent distributed counters using a forgetful bloom filter. Clust. Comput. 19(2), 879\u2013892 (2016)","journal-title":"Clust. Comput."},{"issue":"1","key":"1498_CR26","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1109\/SURV.2011.031611.00024","volume":"14","author":"S Tarkoma","year":"2011","unstructured":"Tarkoma, S., Rothenberg, C., Lagerspetz, E.: Theory and practice of bloom filters for distributed systems. IEEE Commun. Surv. Tutor. 14(1), 131\u2013155 (2011)","journal-title":"IEEE Commun. Surv. Tutor."},{"key":"1498_CR27","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/j.is.2015.01.002","volume":"54","author":"A Crainiceanu","year":"2015","unstructured":"Crainiceanu, A., Lemire, D.: Bloofi: multidimensional Bloom filters. Inf. Syst. 54, 311\u2013324 (2015)","journal-title":"Inf. Syst."},{"issue":"5","key":"1498_CR28","first-page":"1195","volume":"9","author":"Y Wu","year":"2012","unstructured":"Wu, Y., Huang, H., Zhou, X., et al.: A space-saving URL duplication removal method for web crawler. J. Inf. Comput. Sci. 9(5), 1195\u20131203 (2012)","journal-title":"J. Inf. Comput. Sci."},{"issue":"2","key":"1498_CR29","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/s10586-010-0144-5","volume":"14","author":"H Han","year":"2011","unstructured":"Han, H., Jung, H., Eom, H., et al.: Scatter-Gather-Merge: an efficient star-join query processing algorithm for data-parallel frameworks. Clust. Comput. 14(2), 183\u2013197 (2011)","journal-title":"Clust. Comput."},{"issue":"1","key":"1498_CR30","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/s10586-015-0506-0","volume":"19","author":"M Alewiwi","year":"2016","unstructured":"Alewiwi, M., Orencik, C., Savas, E.: Efficient top-k similarity document search utilizing distributed file systems and cosine similarity. Clust. Comput. 19(1), 109\u2013126 (2016)","journal-title":"Clust. Comput."}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-017-1498-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-017-1498-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-017-1498-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,12]],"date-time":"2019-08-12T10:27:35Z","timestamp":1565605655000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-017-1498-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,21]]},"references-count":30,"journal-issue":{"issue":"S1","published-print":{"date-parts":[[2019,1]]}},"alternative-id":["1498"],"URL":"https:\/\/doi.org\/10.1007\/s10586-017-1498-8","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"type":"print","value":"1386-7857"},{"type":"electronic","value":"1573-7543"}],"subject":[],"published":{"date-parts":[[2017,12,21]]},"assertion":[{"value":"29 August 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2017","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 December 2017","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2017","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}