{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T22:13:48Z","timestamp":1757542428291,"version":"3.37.3"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T00:00:00Z","timestamp":1573257600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T00:00:00Z","timestamp":1573257600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2020,4]]},"DOI":"10.1007\/s00530-019-00638-4","type":"journal-article","created":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T20:02:40Z","timestamp":1573329760000},"page":"201-221","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Automatic news-roundup generation using clustering, extraction, and presentation"],"prefix":"10.1007","volume":"26","author":[{"given":"Vincent","family":"Utomo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7197-9912","authenticated-orcid":false,"given":"Jenq-Shiou","family":"Leu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,11,9]]},"reference":[{"key":"638_CR1","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.eswa.2017.05.002","volume":"84","author":"LM Abualigah","year":"2017","unstructured":"Abualigah, L.M., Khader, A.T., Al-Betar, M.A., Alomari, O.A.: Text feature selection with a robust weight scheme and dynamic dimension reduction to text document clustering. Expert Syst. Appl. 84, 24\u201336 (2017)","journal-title":"Expert Syst. Appl."},{"key":"638_CR2","doi-asserted-by":"crossref","unstructured":"Adelberg, B.: Nodose. A tool for semi-automatically extracting semi-structured data from text documents. In: Proceedings of SIGMOD, pp. 283\u2013294 (1998)","DOI":"10.1145\/276305.276330"},{"key":"638_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.eswa.2017.11.055","volume":"96","author":"\u0130 Ar\u0131n","year":"2018","unstructured":"Ar\u0131n, \u0130., Erpam, M.K., Sayg\u0131n, Y.: I-TWEC: interactive clustering tool for Twitter. Expert Syst. Appl. 96, 1\u201313 (2018)","journal-title":"Expert Syst. Appl."},{"key":"638_CR4","unstructured":"Baumgartner, R. a. F. S. a. G. G.: Visual web information extraction with lixto. VLDB 2001. In: Proceedings of 27th International Conference on Very Large Data Bases\u201a September 11\u201314, Roma\u201a Italy (2001)"},{"key":"638_CR5","unstructured":"Cai, D., Yu, S., Wen, J.R., Ma, W.Y.: Vips: a vision-based page segmentation algorithm.\u00a0Microsoft Technical Report, MSR-TR-2003-79 (2003)"},{"key":"638_CR6","doi-asserted-by":"crossref","unstructured":"Carey, H. J., Manic, M.: HTML web content extraction using paragraph tags. In: IEEE 25th International Symposium on Industrial Electronics (ISIE), pp. 1099\u20131104 (2016)","DOI":"10.1109\/ISIE.2016.7745047"},{"key":"638_CR7","doi-asserted-by":"crossref","unstructured":"Chen, H., Dumais, S.: Bringing order to the web: automatically categorizing search results. In: Proceedings of the SIGCHI Conference on Human Factors in Computing Systems, pp. 145\u2013152 (2000)","DOI":"10.1145\/332040.332418"},{"key":"638_CR8","doi-asserted-by":"crossref","unstructured":"Gibson, D., Punera, K., Tomkins, A.: The volume and evolution of web page templates. In Special interest tracks and posters of the 14th international conference on World Wide Web, pp. 830\u2013839 (2005)","DOI":"10.1145\/1062745.1062763"},{"key":"638_CR9","doi-asserted-by":"crossref","unstructured":"Dalvi, N., Bohannon, P., Sha, F.: Robust web extraction: an approach based on a probabilistic tree-edit model. In: Proceedings of the 2009 ACM SIGMOD International Conference on Management of Data, pp. 335\u2013348 (2009)","DOI":"10.1145\/1559845.1559882"},{"issue":"4","key":"638_CR10","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1093\/comjnl\/20.4.364","volume":"20","author":"D Defays","year":"1977","unstructured":"Defays, D.: An efficient algorithm for a complete link method. Comput J 20(4), 364\u2013366 (1977)","journal-title":"Comput J"},{"issue":"34","key":"638_CR11","first-page":"226","volume":"96","author":"M Ester","year":"1996","unstructured":"Ester, M., Kriegel, H.-P., Sander, J., Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise. KDD 96(34), 226\u2013231 (1996)","journal-title":"KDD"},{"key":"638_CR12","unstructured":"Grangier, X.: Python-Goose. \nhttps:\/\/github.com\/grangier\/python-goose\n\n (2011)"},{"key":"638_CR13","doi-asserted-by":"crossref","unstructured":"Gupta, S., Kaiser, G., Neistadt, D., Grimm, P.: DOM-based content extraction of HTML documents. In: Proceedings of the 12th INTERNATIONAL CONFERENCE on World Wide Web, pp. 207\u2013214 (2003)","DOI":"10.1145\/775152.775182"},{"issue":"1","key":"638_CR14","first-page":"100","volume":"28","author":"JA Hartigan","year":"1979","unstructured":"Hartigan, J.A., Wong, M.A.: Algorithm AS 136: A k-means clustering algorithm. J. R. Stat. Soc. Ser. C (Appl. Stat.) 28(1), 100\u2013108 (1979)","journal-title":"J. R. Stat. Soc. Ser. C (Appl. Stat.)"},{"key":"638_CR15","unstructured":"Ifrim, G., Shi, B., Brigadir, I.: Event detection in twitter using aggressive filtering and hierarchical tweet clustering. Second Workshop on Social News on the Web (SNOW), Seoul, Korea, 8 April 2014 (2018)"},{"issue":"8","key":"638_CR16","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/j.jlap.2013.01.002","volume":"82","author":"D Insa","year":"2013","unstructured":"Insa, D., Silva, J., Tamarit, S.: Using the words\/leafs ratio in the DOM tree for content extraction. J. Logic Algebr. Program 82(8), 311\u2013325 (2013)","journal-title":"J. Logic Algebr. Program"},{"key":"638_CR17","doi-asserted-by":"crossref","unstructured":"Ketchen Jr, D.J., Shook, C.L.: The application of cluster analysis in strategic management research: an analysis and critique. Strateg. Manag. J 17(6) 441\u2013458 (1996)","DOI":"10.1002\/(SICI)1097-0266(199606)17:6<441::AID-SMJ819>3.0.CO;2-G"},{"key":"638_CR18","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139924801","volume-title":"Mining of Massive Datasets","author":"J Leskovec","year":"2014","unstructured":"Leskovec, J., Rajaraman, A., Ullman, J.D.: Mining of Massive Datasets. Cambridge University Press, England (2014)"},{"issue":"5323","key":"638_CR19","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1038\/234034a0","volume":"234","author":"M Levandowsky","year":"1971","unstructured":"Levandowsky, M., Winter, D.: Distance between sets. Nature 234(5323), 34\u201335 (1971)","journal-title":"Nature"},{"key":"638_CR20","unstructured":"Liu, L., Pu, C., Han, W.: XWRAP: an XML-enabled wrapper construction system for web information sources. In: Proceedings. 16th International Conference on Data Engineering, pp. 611\u2013621 (2000)"},{"key":"638_CR21","doi-asserted-by":"crossref","unstructured":"Lov\u00e1sz, L., Plummer, M.: Matching theory. Vol. 367. American Mathematical Soc (2009)","DOI":"10.1090\/chel\/367"},{"key":"638_CR22","doi-asserted-by":"crossref","unstructured":"Ma, L., Goharian, N., Chowdhury, A., Chung, M.: Extracting unstructured data from template generated web documents. In: Proceedings of the Twelfth International Conference on Information and Knowledge Management, pp. 512\u2013515 (2003)","DOI":"10.1145\/956863.956961"},{"issue":"5","key":"638_CR23","doi-asserted-by":"publisher","first-page":"635","DOI":"10.1016\/S1389-1286(02)00214-1","volume":"39","author":"J Myllymaki","year":"2002","unstructured":"Myllymaki, J.: Effective web data extraction with standard XML technologies. Comput. Netw. 39(5), 635\u2013644 (2002)","journal-title":"Comput. Netw."},{"key":"638_CR24","unstructured":"Nenkova, A., Vanderwende, L.: The impact of frequency on summarization. Microsoft Research, Redmond, Washington, Tech. Rep. MSR-TR-2005, Volume 101 (2005)"},{"key":"638_CR25","unstructured":"Palacios, R.: Eatiht.\n\nhttp:\/\/rodricios.github.io\/eatiht\/\n\n (2015)"},{"key":"638_CR26","doi-asserted-by":"crossref","unstructured":"Parameswaran, A., Dalvi, N., Garcia-Molina, H., Rastogi, R.: Optimal schemes for robust web extraction. In: Proceedings of the VLDB Conference, Vol. 4 No. 11\u00a0VLDB Endowment, pp. 980\u2013991 (2011)","DOI":"10.14778\/3402707.3402735"},{"key":"638_CR27","unstructured":"Rosa, K.D. et al.: Topical clustering of tweets. Proceedings of the ACM SIGIR: SWSM (2011)"},{"key":"638_CR28","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","volume":"20","author":"PJ Rousseeuw","year":"1987","unstructured":"Rousseeuw, P.J.: Silhouettes: a graphical aid to the interpretation and validation of cluster analysis. J. Comput. Appl. Math. 20, 53\u201365 (1987)","journal-title":"J. Comput. Appl. Math."},{"key":"638_CR29","doi-asserted-by":"crossref","unstructured":"Sanoja, A., Gancarski, S.: Block-o-matic: a web page segmentation framework. In: 2014 International Conference on Multimedia Computing and Systems (ICMCS), pp. 595\u2013600 (2014)","DOI":"10.1109\/ICMCS.2014.6911249"},{"issue":"3","key":"638_CR30","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1145\/3068335","volume":"42","author":"E Schubert","year":"2017","unstructured":"Schubert, E., et al.: DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. ACM Trans. Database Syst. (TODS) 42(3), 19 (2017)","journal-title":"ACM Trans. Database Syst. (TODS)"},{"key":"638_CR31","doi-asserted-by":"crossref","unstructured":"Sharifi, B., Hutton, M.-A., Kalita, J.K.: Experiments in microblog summarization. In: 2010 IEEE Second International Conference on Social Computing (SocialCom), pp. 49\u201356 (2010)","DOI":"10.1109\/SocialCom.2010.17"},{"issue":"4","key":"638_CR32","first-page":"35","volume":"24","author":"A Singhal","year":"2001","unstructured":"Singhal, A.: Modern information retrieval: a brief overview. IEEE Data Eng. Bull. 24(4), 35\u201343 (2001)","journal-title":"IEEE Data Eng. Bull."},{"issue":"1","key":"638_CR33","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/s10115-013-0687-x","volume":"42","author":"D Song","year":"2015","unstructured":"Song, D., Sun, F., Liao, L.: A hybrid approach for content extraction with text density and visual importance of DOM nodes. Knowl. Inf. Syst. 42(1), 75\u201396 (2015)","journal-title":"Knowl. Inf. Syst."},{"key":"638_CR34","doi-asserted-by":"crossref","unstructured":"Sun, F., Song, D., Liao, L.: Dom based content extraction via text density. In: Proceedings of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 245\u2013254 (2011)","DOI":"10.1145\/2009916.2009952"},{"issue":"17","key":"638_CR35","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/2897350.2897353","volume":"2","author":"T Weninger","year":"2016","unstructured":"Weninger, T., Palacios, R., Crescenzi, V., Gottron, T., Merialdo, P.: Web content extraction: a MetaAnalysis of its past and thoughts on its future. ACM SIGKDD Explor. Newsl 2(17), 17\u201323 (2016)","journal-title":"ACM SIGKDD Explor. Newsl"},{"key":"638_CR36","doi-asserted-by":"crossref","unstructured":"Weninger, T., Hsu, W. H., Han, J.: CETR: content extraction via tag ratios. In: Proceedings of the 19th International Conference on World Wide Web, pp. 971\u2013980 (2010)","DOI":"10.1145\/1772690.1772789"},{"key":"638_CR37","unstructured":"Utomo, V., Leu, J.-S.: Unpublished. Subject-Assisted Extraction: Looking at Web Content Extraction from Different Side"},{"issue":"301","key":"638_CR38","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1080\/01621459.1963.10500845","volume":"58","author":"JH Ward Jr","year":"1963","unstructured":"Ward Jr., J.H.: Hierarchical grouping to optimize an objective function. J. Am. Stat. Assoc. 58(301), 236\u2013244 (1963)","journal-title":"J. Am. Stat. Assoc."},{"key":"638_CR39","doi-asserted-by":"crossref","unstructured":"Wu, S., Liu, J., Fan, J.: Automatic web content extraction by combination of learning and grouping. In: Proceedings of the 24th International Conference on World Wide Web, pp. 1264\u20131274 (2015)","DOI":"10.1145\/2736277.2741659"},{"key":"638_CR40","unstructured":"Xie, P., Xing, E.P.: Integrating document clustering and topic modeling. arXiv preprint\n\narXiv:1309.6874\n\n(2013)"},{"key":"638_CR41","doi-asserted-by":"crossref","unstructured":"Xu, W., Liu, X., Gong, Y.: Document clustering based on non-negative matrix factorization. In: Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Informaion Retrieval, pp. 267\u2013273 (2003)","DOI":"10.1145\/860435.860485"},{"issue":"11","key":"638_CR42","doi-asserted-by":"publisher","first-page":"1361","DOI":"10.1016\/S1389-1286(99)00054-7","volume":"31","author":"O Zamir","year":"1999","unstructured":"Zamir, O., Etzioni, O.: Grouper: a dynamic clustering interface to Web search results. Comput. Netw. 31(11), 1361\u20131374 (1999)","journal-title":"Comput. Netw."},{"key":"638_CR43","doi-asserted-by":"crossref","unstructured":"Zeng, H.-J. et al.: Learning to cluster web search results. In: Proceedings of the 27th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 210\u2013217 (2004)","DOI":"10.1145\/1008992.1009030"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-019-00638-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00530-019-00638-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-019-00638-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,8]],"date-time":"2020-11-08T00:37:39Z","timestamp":1604795859000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00530-019-00638-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,9]]},"references-count":43,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2020,4]]}},"alternative-id":["638"],"URL":"https:\/\/doi.org\/10.1007\/s00530-019-00638-4","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2019,11,9]]},"assertion":[{"value":"20 March 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 October 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}