{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T18:56:18Z","timestamp":1757616978078,"version":"3.44.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T00:00:00Z","timestamp":1725840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T00:00:00Z","timestamp":1725840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-20202-1","type":"journal-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T04:02:33Z","timestamp":1725854553000},"page":"26293-26330","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Predicting eye-tracking assisted web page segmentation"],"prefix":"10.1007","volume":"84","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5578-6177","authenticated-orcid":false,"given":"Abdullah","family":"Sulayfani","sequence":"first","affiliation":[]},{"given":"Sukru","family":"Eraslan","sequence":"additional","affiliation":[]},{"given":"Yeliz","family":"Yesilada","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"20202_CR1","doi-asserted-by":"publisher","unstructured":"Yesilada Y, Jay C, Stevens R, Harper S (2008) Validating the use and role of visual elements of web pages in navigation with an eye-tracking study. In: Proceedings of the 17th International Conference on World Wide Web. WWW \u201908. Association for Computing Machinery, New York, NY, USA, pp 11\u201320. https:\/\/doi.org\/10.1145\/1367497.1367500","DOI":"10.1145\/1367497.1367500"},{"key":"20202_CR2","unstructured":"Yesilada Y (2011) Web page segmentation: a review. Technical report, Middle East Technical University. http:\/\/emine.ncc.metu.edu.tr\/deliverables\/emine_D0.pdf"},{"key":"20202_CR3","doi-asserted-by":"publisher","unstructured":"Asakawa C, Takagi H (2000) Annotation-based transcoding for nonvisual web access. In: Proceedings of the Fourth International ACM Conference on Assistive Technologies. Assets \u201900. Association for Computing Machinery, New York, NY, USA, pp 172\u2013179. https:\/\/doi.org\/10.1145\/354324.354588","DOI":"10.1145\/354324.354588"},{"key":"20202_CR4","doi-asserted-by":"publisher","unstructured":"Lin S-H, Ho J-M (2002) Discovering informative content blocks from web documents. In: Proceedings of the Eighth ACM SIGKDD international conference on knowledge discovery and data mining. KDD \u201902. Association for Computing Machinery, New York, NY, USA, pp 588\u2013593. https:\/\/doi.org\/10.1145\/775047.775134","DOI":"10.1145\/775047.775134"},{"key":"20202_CR5","doi-asserted-by":"publisher","unstructured":"Ramaswamy L, Iyengar A, Liu L, Douglis F (2004) Automatic detection of fragments in dynamically generated web pages. In: Proceedings of the 13th International Conference on World Wide Web. WWW \u201904. Association for Computing Machinery, New York, NY, USA, pp 443\u2013454. https:\/\/doi.org\/10.1145\/988672.988732","DOI":"10.1145\/988672.988732"},{"key":"20202_CR6","doi-asserted-by":"publisher","unstructured":"Saad MB, Gan\u00e7arski S (2010) Using visual pages analysis for optimizing web archiving. In: Proceedings of the 2010 EDBT\/ICDT Workshops. EDBT \u201910. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/1754239.1754287","DOI":"10.1145\/1754239.1754287"},{"key":"20202_CR7","doi-asserted-by":"publisher","unstructured":"Wu O, Chen Y, Li B, Hu W (2011) Evaluating the visual quality of web pages using a computational aesthetic approach. WSDM \u201911. Association for Computing Machinery, New York, NY, USA, pp 337\u2013346. https:\/\/doi.org\/10.1145\/1935826.1935883","DOI":"10.1145\/1935826.1935883"},{"key":"20202_CR8","doi-asserted-by":"publisher","unstructured":"Akp\u0131nar ME, Ye\u015filada Y (2013) Vision based page segmentation algorithm: extended and perceived success. In: Sheng QZ, Kjeldskov J (eds) Current trends in web engineering. Springer, Cham. https:\/\/doi.org\/10.1007\/978-3-319-04244-2_22","DOI":"10.1007\/978-3-319-04244-2_22"},{"key":"20202_CR9","doi-asserted-by":"publisher","unstructured":"Drusch G, Bastien JMC (2012) Analyzing visual scanpaths on the web using the mean shift procedure and t-pattern detection: A bottom-up approach. In: Proceedings of the 2012 Conference on Ergonomie et Interaction Homme-Machine. Ergo\u2019IHM \u201912. Association for Computing Machinery, New York, NY, USA, pp 181\u2013184https:\/\/doi.org\/10.1145\/2652574.2653432","DOI":"10.1145\/2652574.2653432"},{"issue":"11","key":"20202_CR10","doi-asserted-by":"publisher","first-page":"205","DOI":"10.21105\/joss.00205","volume":"2","author":"L McInnes","year":"2017","unstructured":"McInnes L, Healy J, Astels S (2017) hdbscan: Hierarchical density based clustering. J Open Source Softw 2(11):205","journal-title":"J Open Source Softw"},{"key":"20202_CR11","doi-asserted-by":"publisher","unstructured":"Eraslan S, Yesilada Y, Harper S (2020) The best of both worlds!: Integration of web page and eye tracking data driven approaches for automatic aoi detection. ACM Trans Web 14(1). https:\/\/doi.org\/10.1145\/3372497","DOI":"10.1145\/3372497"},{"key":"20202_CR12","doi-asserted-by":"publisher","unstructured":"Sanoja A, Gan\u00e7arski S (2014) Block-o-matic: A web page segmentation framework. In: 2014 International conference on multimedia computing and systems (ICMCS), pp 595\u2013600. https:\/\/doi.org\/10.1109\/ICMCS.2014.6911249","DOI":"10.1109\/ICMCS.2014.6911249"},{"issue":"1","key":"20202_CR13","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/s10209-020-00708-9","volume":"20","author":"S Eraslan","year":"2021","unstructured":"Eraslan S, Yesilada Y, Yaneva V, Ha LA (2021) Keep it simple!: an eye-tracking study for exploring complexity and distinguishability of web pages for people with autism. Univers Access Inf Soc 20(1):69\u201384","journal-title":"Univers Access Inf Soc"},{"key":"20202_CR14","doi-asserted-by":"publisher","unstructured":"Yesilada Y, Harper S, Eraslan S (2013) Experiential transcoding: an eyetracking approach. In: Proceedings of the 10th international cross-disciplinary conference on web accessibility. W4A \u201913. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/2461121.2461134","DOI":"10.1145\/2461121.2461134"},{"key":"20202_CR15","doi-asserted-by":"publisher","unstructured":"Trabzon IE, Yagiz F, Karadavut EE, Elhewahey M, Eraslan S, Yesilada Y, Harper S (2022) Framework for experiential transcoding of web pages with scanpath trend analysis. In: Proceedings of the 19th International Web for All Conference. W4A \u201922. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/3493612.3520450","DOI":"10.1145\/3493612.3520450"},{"key":"20202_CR16","doi-asserted-by":"publisher","unstructured":"Burget R, Rudolfova I (2009) Web page element classification based on visual features. In: 2009 First asian conference on intelligent information and database systems, pp 67\u201372. https:\/\/doi.org\/10.1109\/ACIIDS.2009.71","DOI":"10.1109\/ACIIDS.2009.71"},{"key":"20202_CR17","doi-asserted-by":"publisher","unstructured":"Baluja S (2006) Browsing on small screens: Recasting web-page segmentation into an efficient machine learning framework. In: Proceedings of the 15th International Conference on World Wide Web. WWW \u201906. Association for Computing Machinery, New York, NY, USA, pp 33\u201342. https:\/\/doi.org\/10.1145\/1135777.1135788","DOI":"10.1145\/1135777.1135788"},{"key":"20202_CR18","doi-asserted-by":"publisher","unstructured":"Chakrabarti D, Kumar R, Punera K (2008) A graph-theoretic approach to webpage segmentation. In: Proceedings of the 17th International conference on world wide web. WWW \u201908. Association for Computing Machinery, New York, NY, USA, pp 377\u2013386. https:\/\/doi.org\/10.1145\/1367497.1367549","DOI":"10.1145\/1367497.1367549"},{"key":"20202_CR19","doi-asserted-by":"publisher","unstructured":"Bing L, Guo R, Lam W, Niu Z-Y, Wang H (2014) Web page segmentation with structured prediction and its application in web page classification. In: Proceedings of the 37th International ACM SIGIR Conference on Research & Development in Information Retrieval. SIGIR \u201914. Association for Computing Machinery, New York, NY, USA, pp 767\u2013776. https:\/\/doi.org\/10.1145\/2600428.2609630","DOI":"10.1145\/2600428.2609630"},{"key":"20202_CR20","doi-asserted-by":"publisher","unstructured":"Feng H, Zhang W, Wu H, Wang C-J (2016) Web page segmentation and its application for web information crawling. In: 2016 IEEE 28th International conference on tools with artificial intelligence (ICTAI), pp 598\u2013605. https:\/\/doi.org\/10.1109\/ICTAI.2016.0097","DOI":"10.1109\/ICTAI.2016.0097"},{"issue":"4","key":"20202_CR21","doi-asserted-by":"publisher","first-page":"928","DOI":"10.1016\/j.ipm.2013.02.005","volume":"49","author":"E Uzun","year":"2013","unstructured":"Uzun E, Agun HV, Yerlikaya T (2013) A hybrid approach for extracting informative content from web pages. Inf Process Manage 49(4):928\u2013944. https:\/\/doi.org\/10.1016\/j.ipm.2013.02.005","journal-title":"Inf Process Manage"},{"key":"20202_CR22","doi-asserted-by":"crossref","unstructured":"Nethra K, Anitha J, Thilagavathi G (2014) Web content extraction using hybrid approach. ICTACT J Soft Comput 4(2)","DOI":"10.21917\/ijsc.2014.0099"},{"key":"20202_CR23","doi-asserted-by":"publisher","unstructured":"Kohlsch\u00fctter C, Fankhauser P, Nejdl W (2010) Boilerplate detection using shallow text features. In: Proceedings of the Third ACM international conference on web search and data mining. WSDM \u201910. Association for Computing Machinery, New York, NY, USA, pp 441\u2013450. https:\/\/doi.org\/10.1145\/1718487.1718542","DOI":"10.1145\/1718487.1718542"},{"issue":"1","key":"20202_CR24","first-page":"1","volume":"13","author":"H Sano","year":"2013","unstructured":"Sano H, Swezey RM, Shiramatsu S, Ozono T, Shintani T (2013) A web page segmentation method by using headlines to web. IJCSNS 13(1):1","journal-title":"IJCSNS"},{"key":"20202_CR25","unstructured":"Lones MA (2021) How to avoid machine learning pitfalls: a guide for academic researchers. arXiv:2108.02497"},{"key":"20202_CR26","volume-title":"Document object model","author":"J Marini","year":"2002","unstructured":"Marini J (2002) Document object model, 1st edn. McGraw-Hill Inc, USA","edition":"1"},{"key":"20202_CR27","unstructured":"Cai D, Yu S, Wen J-R, Ma W-Y (2003) Vips: a vision-based page segmentation algorithm. Technical report, Microsoft. https:\/\/www.microsoft.com\/en-us\/research\/publication\/vips-a-vision-based-page-segmentation-algorithm\/"},{"key":"20202_CR28","unstructured":"Puppeteer. https:\/\/github.com\/puppeteer. Accessed 03 July 2024"},{"key":"20202_CR29","doi-asserted-by":"publisher","unstructured":"Eraslan S, Yesilada Y, Harper S (2016) Scanpath trend analysis on web pages: clustering eye tracking scanpaths. ACM Trans Web 10(4). https:\/\/doi.org\/10.1145\/2970818","DOI":"10.1145\/2970818"},{"key":"20202_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijhcs.2020.102523","volume":"145","author":"E Michailidou","year":"2021","unstructured":"Michailidou E, Eraslan S, Yesilada Y, Harper S (2021) Automated prediction of visual complexity of web pages: tools and evaluations. Int J Hum Comput Stud 145:102523. https:\/\/doi.org\/10.1016\/j.ijhcs.2020.102523","journal-title":"Int J Hum Comput Stud"},{"issue":"6","key":"20202_CR31","doi-asserted-by":"publisher","first-page":"845","DOI":"10.1093\/iwc\/iwx015","volume":"29","author":"ME Akpinar","year":"2017","unstructured":"Akpinar ME, Ye\u015filada Y (2017) Discovering visual elements of web pages and their roles: users\u2019 perception. Interact Comput 29(6):845\u2013867","journal-title":"Interact Comput"},{"key":"20202_CR32","unstructured":"Gad AF (2020) Measuring text similarity using the levenshtein distance. Accessed 17 Jan 2023"},{"key":"20202_CR33","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O, Blondel M, Prettenhofer P, Weiss R, Dubourg V, Vanderplas J, Passos A, Cournapeau D, Brucher M, Perrot M, Duchesnay E (2011) Scikit-learn: machine learning in python. J Mach Learn Res 12:2825\u20132830","journal-title":"J Mach Learn Res"},{"issue":"17","key":"20202_CR34","first-page":"1","volume":"18","author":"G Lema\u00eetre","year":"2017","unstructured":"Lema\u00eetre G, Nogueira F, Aridas CK (2017) Imbalanced-learn: a python toolbox to tackle the curse of imbalanced datasets in machine learning. J Mach Learn Res 18(17):1\u20135","journal-title":"J Mach Learn Res"},{"key":"20202_CR35","unstructured":"Brownlee J (2020) Random Oversampling and Undersampling for Imbalanced Classification. https:\/\/machinelearningmastery.com\/random-oversampling-and-undersampling-for-imbalanced-classification\/. Accessed 03 Mar 2023"},{"key":"20202_CR36","doi-asserted-by":"publisher","unstructured":"Christen P, Hand DJ, Kirielle N (2023) A review of the f-measure: Its history, properties, criticism, and alternatives. ACM Comput Surv 56(3). https:\/\/doi.org\/10.1145\/3606367","DOI":"10.1145\/3606367"},{"key":"20202_CR37","doi-asserted-by":"publisher","unstructured":"Ray S (2019) A quick review of machine learning algorithms. In: 2019 International conference on machine learning, big data, cloud and parallel computing (COMITCon), pp 35\u201339. https:\/\/doi.org\/10.1109\/COMITCon.2019.8862451","DOI":"10.1109\/COMITCon.2019.8862451"},{"key":"20202_CR38","unstructured":"Band A (2020) How to find the optimal value of K in KNN? https:\/\/towardsdatascience.com\/how-to-find-the-optimal-value-of-k-in-knn-35d936e554eb. Accessed 10 Feb 2023"},{"key":"20202_CR39","unstructured":"Abadi M, Agarwal A, Barham P, Brevdo E, Chen Z, Citro C, Corrado GS, Davis A, Dean J, Devin M, Ghemawat S, Goodfellow I, Harp A, Irving G, Isard M, Jia Y, Jozefowicz R, Kaiser L, Kudlur , Levenberg J, Man\u00e9 D, Monga R, Moore S, Murray D, Olah C, Schuster M, Shlens J, Steiner B, Sutskever I, Talwar K, Tucker P, Vanhoucke V, Vasudevan V, Vi\u00e9gas F, Vinyals O, Warden P., Wattenberg M, Wicke M, Yu Y, Zheng X (2015) TensorFlow: Large-scale machine learning on heterogeneous systems. Software available from tensorflow.org. https:\/\/www.tensorflow.org\/. Accessed 17 Jan 2023"},{"key":"20202_CR40","doi-asserted-by":"publisher","unstructured":"McKinney (2010) Data structures for statistical computing in python. In: Walt M (eds) Proceedings of the 9th Python in Science Conference, pp 56\u201361 . https:\/\/doi.org\/10.25080\/Majora-92bf1922-00a","DOI":"10.25080\/Majora-92bf1922-00a"},{"key":"20202_CR41","unstructured":"Klosterman S (2020) Why decision trees are more flexible than linear models. https:\/\/www.steveklosterman.com\/flexible-decision-trees\/. Accessed 18 Mar 2023"},{"key":"20202_CR42","doi-asserted-by":"publisher","DOI":"10.3389\/fnsys.2021.620558","volume":"15","author":"F Zhao","year":"2021","unstructured":"Zhao F, Zeng Y (2021) Dynamically optimizing network structure based on synaptic pruning in the brain. Front Syst Neurosci 15:620558","journal-title":"Front Syst Neurosci"},{"key":"20202_CR43","unstructured":"Brownlee J (2021) Undersampling algorithms for imbalanced classification. https:\/\/machinelearningmastery.com\/undersampling-algorithms-for-imbalanced-classification\/. Accessed 18 Mar 2023"},{"key":"20202_CR44","unstructured":"Difference between Padding, Margin, and Border properties. https:\/\/www.w3.org\/TR\/CSS2\/box.html. Accessed 17 Jan 2023"},{"key":"20202_CR45","unstructured":"Converting rgba values into one integer in Javascript (2013). https:\/\/stackoverflow.com\/questions\/17945972\/converting-rgba-values-into-one-integer-in-javascript. Accessed 17 Jan 2023"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20202-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-20202-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20202-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T22:12:19Z","timestamp":1757110339000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-20202-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,9]]},"references-count":45,"journal-issue":{"issue":"23","published-online":{"date-parts":[[2025,7]]}},"alternative-id":["20202"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-20202-1","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,9,9]]},"assertion":[{"value":"13 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Below are the declarations made by the authors.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The datasets used in this study were collected in previous studies, and the ethical approvals were taken from the University of Manchester for the first dataset [] and the University of Wolverhampton for the second one [].","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"The authors have no relevant financial or non-financial interests to disclose. All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript. The authors have no financial or proprietary interests in any material discussed in this article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}]}}