{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:31:32Z","timestamp":1772119892656,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T00:00:00Z","timestamp":1740528000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T00:00:00Z","timestamp":1740528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["World Wide Web"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s11280-025-01333-3","type":"journal-article","created":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T23:43:45Z","timestamp":1740527025000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A DOM-structural cohesion analysis approach for segmentation of modern web pages"],"prefix":"10.1007","volume":"28","author":[{"given":"Hieu","family":"Huynh","sequence":"first","affiliation":[]},{"given":"Quoc-Tri","family":"Le","sequence":"additional","affiliation":[]},{"given":"Vu","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Tien","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,26]]},"reference":[{"key":"1333_CR1","doi-asserted-by":"crossref","unstructured":"Alcic, S., Conrad, S.: Page segmentation by Web content clustering. In: Proceedings of the WIMS. pp.\u00a01\u20139 (2011)","DOI":"10.1145\/1988688.1988717"},{"key":"1333_CR2","doi-asserted-by":"crossref","unstructured":"Baluja, S.: Browsing on small screens: Recasting Web-page segmentation into an efficient machine learning framework. In: Proceedings of the Fifteenth International World Wide Web Conference. Edinburgh, Scotland (2006). http:\/\/www.esprockets.com\/papers\/www2006-2502-baluja.pdf","DOI":"10.1145\/1135777.1135788"},{"key":"1333_CR3","doi-asserted-by":"crossref","unstructured":"Bar-Yossef, Z., Rajagopalan, S.: Template detection via data mining and its applications. In: Proceedings of the 11th WWW. pp. 580\u2013591 (2002)","DOI":"10.1145\/511446.511522"},{"key":"1333_CR4","doi-asserted-by":"crossref","first-page":"102126","DOI":"10.1016\/j.is.2022.102126","volume":"112","author":"S Brisset","year":"2023","unstructured":"Brisset, S., Rouvoy, R., Seinturier, L., Pawlak, R.: Sftm: Fast matching of Web pages using similarity-based flexible tree matching. Inf. Syst. 112, 102126 (2023)","journal-title":"Inf. Syst."},{"key":"1333_CR5","doi-asserted-by":"crossref","unstructured":"Cai, D., He, X., Li, Z., Ma, W.Y., Wen, J.R.: Hierarchical clustering of www image search results using visual 01 (2004)","DOI":"10.1145\/1027527.1027747"},{"key":"1333_CR6","doi-asserted-by":"crossref","unstructured":"Cai, D., Yu, S., Wen, J.R., Ma, W.Y.: Extracting content structure for Web pages based on visual representation. In: Proceedings of the 5th APWeb. p. 406\u2013417. APWeb\u201903, Springer-Verlag, Berlin, Heidelberg (2003)","DOI":"10.1007\/3-540-36901-5_42"},{"key":"1333_CR7","unstructured":"Cai, D., Yu, S., Wen, J.R., Ma, W.Y.: Vips: a vision-based page segmentation algorithm 01 (2003)"},{"key":"1333_CR8","doi-asserted-by":"crossref","first-page":"679","DOI":"10.1109\/TPAMI.1986.4767851","volume":"6","author":"J Canny","year":"1986","unstructured":"Canny, J.: A computational approach to edge detection. IEEE Trans. Pattern Anal. Mach. Intell. 6, 679\u2013698 (1986)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1333_CR9","doi-asserted-by":"crossref","unstructured":"Chakrabarti, D., Kumar, R., Punera, K.: A graph-theoretic approach to webpage segmentation. In: Proceedings of the 17th WWW. pp. 377\u2013386 (2008)","DOI":"10.1145\/1367497.1367549"},{"key":"1333_CR10","doi-asserted-by":"publisher","unstructured":"Chakrabarti, D., Kumar, R., Punera, K.: A graph-theoretic approach to webpage segmentation. pp. 377\u2013386 (08 2008). https:\/\/doi.org\/10.1145\/1367497.1367549","DOI":"10.1145\/1367497.1367549"},{"key":"1333_CR11","doi-asserted-by":"crossref","unstructured":"Chen, K., Pang, J., Wang, J., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Shi, J., Ouyang, W., et\u00a0al.: Hybrid task cascade for instance segmentation. In: CVPR. pp. 4974\u20134983 (2019)","DOI":"10.1109\/CVPR.2019.00511"},{"key":"1333_CR12","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Xu, J., et\u00a0al.: Mmdetection: Open mmlab detection toolbox and benchmark. arXiv:1906.07155 (2019)"},{"key":"1333_CR13","doi-asserted-by":"crossref","unstructured":"Cormer, M., Mann, R., Moffatt, K., Cohen, R.: Towards an improved vision-based Web page segmentation algorithm. In: 2017 14th CRV. pp. 345\u2013352. IEEE (2017)","DOI":"10.1109\/CRV.2017.38"},{"key":"1333_CR14","doi-asserted-by":"crossref","unstructured":"Huynh, H., Pham, N., Nguyen, T.N., Nguyen, V.: Segment-based test case prioritization: A multi-objective approach. In: Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis. pp. 1149\u20131160 (2024)","DOI":"10.1145\/3650212.3680349"},{"key":"1333_CR15","doi-asserted-by":"publisher","unstructured":"Jayashree, S.R., Dias, G., Andrew, J.J., Saha, S., Maurel, F., Ferrari, S.: Multimodal Web page segmentation using self-organized multi-objective clustering. ACM Trans. Inf. Syst. 40(3) (2022).https:\/\/doi.org\/10.1145\/3480966","DOI":"10.1145\/3480966"},{"issue":"3","key":"1333_CR16","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3326457","volume":"19","author":"Z Jiang","year":"2019","unstructured":"Jiang, Z., Yin, H., Wu, Y., Lyu, Y., Min, G., Zhang, X.: Constructing novel block layouts for webpage analysis. TOIT 19(3), 1\u201318 (2019)","journal-title":"TOIT"},{"key":"1333_CR17","doi-asserted-by":"publisher","unstructured":"Kiesel, J., Kneist, F., Meyer, L., Komlossy, K., Stein, B., Potthast, M.: Web page segmentation revisited: Evaluation framework and dataset. In: Proceedings of the 29th ACM CIKM. pp. 3047\u20133054. CIKM \u201920, Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3340531.3412782","DOI":"10.1145\/3340531.3412782"},{"key":"1333_CR18","doi-asserted-by":"crossref","unstructured":"Kiesel, J., Meyer, L., Kneist, F., Stein, B., Potthast, M.: An empirical comparison of Web page segmentation algorithms. In: ECIR 2021, Virtual Event, March 28\u2013April 1, 2021, Proceedings, Part II 43. pp. 62\u201374. Springer (2021)","DOI":"10.1007\/978-3-030-72240-1_5"},{"key":"1333_CR19","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A.C., Lo, W.Y., Doll\u00e1r, P., Girshick, R.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"1333_CR20","doi-asserted-by":"crossref","unstructured":"Kohlsch\u00fctter, C., Nejdl, W.: A densitometric approach to Web page segmentation. In: Proceedings of the 17th ACM CIKM. pp. 1173\u20131182 (2008)","DOI":"10.1145\/1458082.1458237"},{"key":"1333_CR21","doi-asserted-by":"crossref","unstructured":"Lu, C., Bing, L., Lam, W.: Structured positional entity language model for enterprise entity retrieval. In: 22nd ACM CIKM. pp. 129\u2013138 (2013)","DOI":"10.1145\/2505515.2505702"},{"issue":"12","key":"1333_CR22","doi-asserted-by":"publisher","first-page":"1606","DOI":"10.14778\/2824032.2824058","volume":"8","author":"T Manabe","year":"2015","unstructured":"Manabe, T., Tajima, K.: Extracting logical hierarchical structure of html documents based on headings. Proc. VLDB Endow. 8(12), 1606\u20131617 (2015). https:\/\/doi.org\/10.14778\/2824032.2824058","journal-title":"Proc. VLDB Endow."},{"key":"1333_CR23","doi-asserted-by":"crossref","unstructured":"Manku, G.S., Jain, A., Das\u00a0Sarma, A.: Detecting near-duplicates for Web crawling. In: Proceedings of the 16th WWW. pp. 141\u2013150 (2007)","DOI":"10.1145\/1242572.1242592"},{"key":"1333_CR24","doi-asserted-by":"crossref","unstructured":"Meier, B., Stadelmann, T., Stampfli, J., Arnold, M., Cieliebak, M.: Fully convolutional neural networks for newspaper article segmentation. In: 2017 14th ICDAR. vol.\u00a01, pp. 414\u2013419. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.75"},{"key":"1333_CR25","doi-asserted-by":"crossref","unstructured":"Narayana, V., Premchand, P., Govardhan, A.: A novel and efficient approach for near duplicate page detection in Web crawling. In: 2009 IACC. pp. 1492\u20131496. IEEE (2009)","DOI":"10.1109\/IADCC.2009.4809238"},{"key":"1333_CR26","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1016\/j.is.2015.08.004","volume":"56","author":"M Pawlik","year":"2016","unstructured":"Pawlik, M., Augsten, N.: Tree edit distance: Robust and memory-efficient. Inf. Syst. 56, 157\u2013173 (2016). https:\/\/doi.org\/10.1016\/j.is.2015.08.004","journal-title":"Inf. Syst."},{"key":"1333_CR27","doi-asserted-by":"publisher","unstructured":"Sanoja, A., Gan\u00e7, S.: Block-o-matic: A Web page segmentation framework. In: 2014 ICMCS. pp. 595\u2013600 (2014). https:\/\/doi.org\/10.1109\/ICMCS.2014.6911249","DOI":"10.1109\/ICMCS.2014.6911249"},{"issue":"OOPSLA","key":"1333_CR28","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3485533","volume":"5","author":"T Su","year":"2021","unstructured":"Su, T., Yan, Y., Wang, J., Sun, J., Xiong, Y., Pu, G., Wang, K., Su, Z.: Fully automated functional fuzzing of android apps for detecting non-crashing logic bugs. Proc. ACM Program. Lang. 5(OOPSLA), 1\u201331 (2021)","journal-title":"Proc. ACM Program. Lang."},{"issue":"3","key":"1333_CR29","first-page":"173","volume":"4","author":"RP Velloso","year":"2013","unstructured":"Velloso, R.P., Dorneles, C.F.: Automatic Web page segmentation and noise removal for structured extraction using tag path sequences. JIDM 4(3), 173\u2013173 (2013)","journal-title":"JIDM"},{"key":"1333_CR30","doi-asserted-by":"crossref","unstructured":"Vieira, K., Da\u00a0Silva, A.S., Pinto, N., De\u00a0Moura, E.S., Cavalcanti, J.M., Freire, J.: A fast and robust method for Web page template detection and removal. In: 15th ACM CIKM. pp. 258\u2013267 (2006)","DOI":"10.1145\/1183614.1183654"},{"key":"1333_CR31","doi-asserted-by":"crossref","unstructured":"Xiang, P., Yang, X., Shi, Y.: Web page segmentation based on gestalt theory. In: 2007 IEEE ICME. pp. 2253\u20132256. IEEE (2007)","DOI":"10.1109\/ICME.2007.4285135"},{"key":"1333_CR32","unstructured":"Xie, X., Miao, G., Song, R., Wen, J.R., Ma, W.Y.: Efficient browsing of Web search results on mobile devices based on block importance model. In: 3rd IEEE PerCom. pp. 17\u201326. IEEE (2005)"},{"issue":"3","key":"1333_CR33","doi-asserted-by":"publisher","first-page":"1086","DOI":"10.1109\/TSE.2022.3171295","volume":"49","author":"RK Yandrapally","year":"2023","unstructured":"Yandrapally, R.K., Mesbah, A.: Fragment-based test generation for Web apps. IEEE Trans. Softw. Eng. 49(3), 1086\u20131101 (2023). https:\/\/doi.org\/10.1109\/TSE.2022.3171295","journal-title":"IEEE Trans. Softw. Eng."},{"key":"1333_CR34","doi-asserted-by":"crossref","unstructured":"Yandrapally, R., Sinha, S., Tzoref-Brill, R., Mesbah, A.: Carving ui tests to generate api tests and api specification. In: 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). pp. 1971\u20131982. IEEE (2023)","DOI":"10.1109\/ICSE48619.2023.00167"},{"key":"1333_CR35","doi-asserted-by":"crossref","unstructured":"Yi, L., Liu, B., Li, X.: Eliminating noisy information in Web pages for data mining. In: 9th KDD. pp. 296\u2013305 (2003)","DOI":"10.1145\/956750.956785"},{"key":"1333_CR36","doi-asserted-by":"crossref","unstructured":"Yin, X., Lee, W.S.: Understanding the function of Web elements for mobile content delivery using random walk models. In: Special interest tracks and posters of the 14th WWW. pp. 1150\u20131151 (2005)","DOI":"10.1145\/1062745.1062913"}],"container-title":["World Wide Web"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-025-01333-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11280-025-01333-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-025-01333-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T20:29:42Z","timestamp":1742675382000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11280-025-01333-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,26]]},"references-count":36,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["1333"],"URL":"https:\/\/doi.org\/10.1007\/s11280-025-01333-3","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-4392630\/v1","asserted-by":"object"}]},"ISSN":["1386-145X","1573-1413"],"issn-type":[{"value":"1386-145X","type":"print"},{"value":"1573-1413","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,26]]},"assertion":[{"value":"9 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 February 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"The authors declare no Conflict of interest.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"23"}}