{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T17:23:20Z","timestamp":1743009800793,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030192730"},{"type":"electronic","value":"9783030192747"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-19274-7_1","type":"book-chapter","created":{"date-parts":[[2019,4,25]],"date-time":"2019-04-25T18:09:50Z","timestamp":1556215790000},"page":"3-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Web Page Structured Content Detection Using Supervised Machine Learning"],"prefix":"10.1007","author":[{"given":"Roberto Panerai","family":"Velloso","sequence":"first","affiliation":[]},{"given":"Carina F.","family":"Dorneles","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,26]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: XGBoost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD (2016)","DOI":"10.1145\/2939672.2939785"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Cho, W.T., Lin, Y.M., Kao, H.Y.: Entropy-based visual tree evaluation on block extraction. In: Proceedings of the 2009 IEEE\/WIC\/ACM, pp. 580\u2013583. IEEE Computer Society (2009)","DOI":"10.1109\/WI-IAT.2009.98"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Fernandes, et al.: Computing block importance for searching on web sites. In: CIKM, pp. 165\u2013174. ACM (2007)","DOI":"10.1145\/1321440.1321466"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Fernandes, et al.: A site oriented method for segmenting web pages. In: SIGIR, pp. 215\u2013224. ACM (2011)","DOI":"10.1145\/2009916.2009949"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Gibson, D., Punera, K., Tomkins, A.: The volume and evolution of web page templates. In: WWW, pp. 830\u2013839. ACM (2005)","DOI":"10.1145\/1062745.1062763"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Kohlsch\u00fctter, C., Fankhauser, P., Nejdl, W.: Boilerplate detection using shallow text features. In: WSDM, pp. 441\u2013450. ACM (2010)","DOI":"10.1145\/1718487.1718542"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Kohlsch\u00fctter, C., Nejdl, W.: A densitometric approach to web page segmentation. In: CIKM, pp. 1173\u20131182. ACM (2008)","DOI":"10.1145\/1458082.1458237"},{"key":"1_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1007\/978-3-030-03056-8_3","volume-title":"Current Trends in Web Engineering","author":"A Kravchenko","year":"2018","unstructured":"Kravchenko, A., Fayzrakhmanov, R.R., Sallinger, E.: Web page representations and data extraction with BERyL. In: Pautasso, C., S\u00e1nchez-Figueroa, F., Syst\u00e4, K., Murillo Rodr\u00edguez, J.M. (eds.) ICWE 2018. LNCS, vol. 11153, pp. 22\u201330. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-03056-8_3"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Kushmerick, N.: Learning to remove internet advertisements. In: Proceedings of the Third Annual Conference on Autonomous Agents, pp. 175\u2013181. ACM (1999)","DOI":"10.1145\/301136.301186"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Liu, B., Grossman, R., Zhai, Y.: Mining data records in web pages. In: SIGKDD, pp. 601\u2013606. ACM (2003)","DOI":"10.1145\/956750.956826"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Miao, G., Tatemura, J., Hsiung, W.P., Sawires, A., Moser, L.E.: Extracting data records from the web using tag path clustering. In: WWW, pp. 981\u2013990. ACM (2009)","DOI":"10.1145\/1526709.1526841"},{"key":"1_CR12","volume-title":"Discrete-Time Signal Processing","author":"AV Oppenheim","year":"1989","unstructured":"Oppenheim, A.V., et al.: Discrete-Time Signal Processing. Prentice Hall, Englewood Cliffs (1989)"},{"key":"1_CR13","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"issue":"24","key":"1_CR14","doi-asserted-by":"publisher","first-page":"638","DOI":"10.21105\/joss.00638","volume":"3","author":"S Raschka","year":"2018","unstructured":"Raschka, S.: Mlxtend: providing machine learning and data science utilities and extensions to Python\u2019s scientific computing stack. J. Open Source Softw. 3(24), 638 (2018). https:\/\/doi.org\/10.21105\/joss.00638. http:\/\/joss.theoj.org\/papers\/10.21105\/joss.00638","journal-title":"J. Open Source Softw."},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Velloso, R.P., Dorneles, C.F.: Extracting records from the web using a signal processing approach. In: CIKM 2017 (2017)","DOI":"10.1145\/3132847.3132875"},{"issue":"3","key":"1_CR16","first-page":"173","volume":"4","author":"RP Velloso","year":"2013","unstructured":"Velloso, R.P., Dorneles, C.F.: Automatic web page segmentation and noise removal for structured extraction using tag path sequences. JIDM 4(3), 173 (2013)","journal-title":"JIDM"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Vieira, K., et al.: A fast and robust method for web page template detection and removal. In: CIKM, pp. 258\u2013267. ACM (2006)","DOI":"10.1145\/1183614.1183654"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Yamada, Y., Craswell, N., Nakatoh, T., Hirokawa, S.: Testbed for information extraction from deep web. In: WWW, pp. 346\u2013347. ACM (2004)","DOI":"10.1145\/1010432.1010533"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Yi, L., Liu, B., Li, X.: Eliminating noisy information in web pages for data mining. In: SIGKDD, pp. 296\u2013305. ACM (2003)","DOI":"10.1145\/956750.956785"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Zheng, S., Song, R., Wen, J.R., Wu, D.: Joint optimization of wrapper generation and template detection. In: SIGKDD, pp. 894\u2013902. ACM (2007)","DOI":"10.1145\/1281192.1281287"}],"container-title":["Lecture Notes in Computer Science","Web Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-19274-7_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,25]],"date-time":"2024-04-25T00:01:51Z","timestamp":1714003311000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-19274-7_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030192730","9783030192747"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-19274-7_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"26 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICWE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Web Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 June 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 June 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icwe2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icwe2019.webengineering.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"106","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}