{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T03:18:42Z","timestamp":1743045522987,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031530241"},{"type":"electronic","value":"9783031530258"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-53025-8_15","type":"book-chapter","created":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T20:02:12Z","timestamp":1706731332000},"page":"197-216","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Text Chunking to\u00a0Improve Website Classification"],"prefix":"10.1007","author":[{"given":"Mohamed Zohir","family":"Koufi","sequence":"first","affiliation":[]},{"given":"Zahia","family":"Guessoum","sequence":"additional","affiliation":[]},{"given":"Amor","family":"Keziou","sequence":"additional","affiliation":[]},{"given":"Itheri","family":"Yahiaoui","sequence":"additional","affiliation":[]},{"given":"Chlo\u00e9","family":"Martineau","sequence":"additional","affiliation":[]},{"given":"Wandrille","family":"Domin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,1]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Bart\u00edk, V.: Text-based web page classification with use of visual information. In: 2010 International Conference on Advances in Social Networks Analysis and Mining, pp. 416\u2013420. IEEE (2010)","DOI":"10.1109\/ASONAM.2010.34"},{"key":"15_CR2","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: the long-document transformer (2020)"},{"key":"15_CR3","unstructured":"Choromanski, K., et al.: Rethinking attention with performers. arXiv preprint arXiv:2009.14794 (2020)"},{"issue":"3\/4","key":"15_CR4","doi-asserted-by":"publisher","first-page":"256","DOI":"10.2307\/2332378","volume":"37","author":"WG Cochran","year":"1950","unstructured":"Cochran, W.G.: The comparison of percentages in matched samples. Biometrika 37(3\/4), 256\u2013266 (1950)","journal-title":"Biometrika"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q.V.: Transformer-XL: attentive language models beyond a fixed-length context. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 2978\u20132988 (2019)","DOI":"10.18653\/v1\/P19-1285"},{"key":"15_CR6","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"15_CR7","series-title":"Proceedings in Adaptation, Learning and Optimization","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/978-3-030-58989-9_5","volume-title":"Proceedings of ELM2019","author":"L Espinosa-Leal","year":"2021","unstructured":"Espinosa-Leal, L., Akusok, A., Lendasse, A., Bj\u00f6rk, K.-M.: Website classification from webpage renders. In: Cao, J., Vong, C.M., Miche, Y., Lendasse, A. (eds.) ELM 2019. PALO, vol. 14, pp. 41\u201350. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-58989-9_5"},{"issue":"4","key":"15_CR8","first-page":"238","volume":"6","author":"M Janaki Meena","year":"2011","unstructured":"Janaki Meena, M., Chandran, K., Karthik, A., Vijay Samuel, A.: A parallel ACO algorithm to select terms to categorise longer documents. Int. J. Comput. Sci. Eng. 6(4), 238\u2013248 (2011)","journal-title":"Int. J. Comput. Sci. Eng."},{"key":"15_CR9","unstructured":"Kitaev, N., Kaiser, u., Levskaya, A.: Adaptive attention span in transformers. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 2911\u20132922 (2020)"},{"key":"15_CR10","unstructured":"Kitaev, N., Kaiser, u., Levskaya, A.: Reformer: the efficient transformer. In: Proceedings of the 37th International Conference on Machine Learning, ICML 2020 (2020)"},{"key":"15_CR11","doi-asserted-by":"publisher","unstructured":"Kumar, J., Santhanavijayan, A., Janet, B., Rajendran, B., Bindhumadhava, B.: Phishing website classification and detection using machine learning. In: 2020 International Conference on Computer Communication and Informatics (ICCCI), pp. 1\u20136 (2020). https:\/\/doi.org\/10.1109\/ICCCI48352.2020.9104161","DOI":"10.1109\/ICCCI48352.2020.9104161"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Kuncheva, L.I.: Combining Pattern Classifiers: Methods and Algorithms. Wiley (2014)","DOI":"10.1002\/9781118914564"},{"key":"15_CR13","unstructured":"Liu, Y., et al.: ROBERTa: a robustly optimized BERT pretraining approach. CoRR abs\/1907.11692 (2019). http:\/\/arxiv.org\/abs\/1907.11692"},{"issue":"5","key":"15_CR14","doi-asserted-by":"publisher","first-page":"5861","DOI":"10.1016\/j.eswa.2011.11.081","volume":"39","author":"MJ Meena","year":"2012","unstructured":"Meena, M.J., Chandran, K., Karthik, A., Samuel, A.V.: An enhanced ACO algorithm to select features for text categorization and its parallelization. Exp. Syst. Appl. 39(5), 5861\u20135871 (2012)","journal-title":"Exp. Syst. Appl."},{"issue":"3","key":"15_CR15","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1049\/iet-ifs.2013.0202","volume":"8","author":"RM Mohammad","year":"2014","unstructured":"Mohammad, R.M., Thabtah, F., McCluskey, L.: Intelligent rule-based phishing websites classification. IET Inf. Secur. 8(3), 153\u2013160 (2014)","journal-title":"IET Inf. Secur."},{"issue":"4","key":"15_CR16","doi-asserted-by":"publisher","first-page":"3407","DOI":"10.1016\/j.eswa.2010.08.126","volume":"38","author":"SA \u00d6zel","year":"2011","unstructured":"\u00d6zel, S.A.: A web page classification system based on a genetic algorithm using tagged-terms as features. Exp. Syst. Appl. 38(4), 3407\u20133415 (2011)","journal-title":"Exp. Syst. Appl."},{"key":"15_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1007\/978-3-319-11746-1_33","volume-title":"Web Information Systems Engineering \u2013 WISE 2014","author":"A Panwar","year":"2014","unstructured":"Panwar, A., Onut, I.-V., Miller, J.: Towards real time contextual advertising. In: Benatallah, B., Bestavros, A., Manolopoulos, Y., Vakali, A., Zhang, Y. (eds.) WISE 2014. LNCS, vol. 8787, pp. 445\u2013459. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-11746-1_33"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Qi, X., Davison, B.D.: Knowing a web page by the company it keeps. In: Proceedings of the 15th ACM International Conference on Information and Knowledge Management, pp. 228\u2013237 (2006)","DOI":"10.1145\/1183614.1183650"},{"issue":"2","key":"15_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1459352.1459357","volume":"41","author":"X Qi","year":"2009","unstructured":"Qi, X., Davison, B.D.: Web page classification: features and algorithms. ACM Comput. Surv. (CSUR) 41(2), 1\u201331 (2009)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Qiu, J., Ma, H., Levy, O., Yih, W., Wang, S., Tang, J.: Blockwise self-attention for long document understanding (2019)","DOI":"10.18653\/v1\/2020.findings-emnlp.232"},{"key":"15_CR21","unstructured":"Rae, J.W., Potapenko, A., Jayakumar, S.M., Lillicrap, T.P.: Compressive transformers for long-range sequence modelling. In: International Conference on Learning Representations (ICLR) (2020)"},{"key":"15_CR22","unstructured":"Reitermanova, Z.: Data splitting. In: WDS, vol. 10, pp. 31\u201336. MatfyzPress, Prague (2010)"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Roy, A., Saffar, M., Vaswani, A., Grangier, D.: Efficient content-based sparse attention with routing transformers. arXiv preprint arXiv:2003.05997 (2020)","DOI":"10.1162\/tacl_a_00353"},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Shabudin, S., Sani, N.S., Ariffin, K.A.Z., Aliff, M.: Feature selection for phishing website classification. Int. J. Adv. Comput. Sci. Appl. 11(4) (2020)","DOI":"10.14569\/IJACSA.2020.0110477"},{"key":"15_CR25","first-page":"17472","volume":"6","author":"SD Vaghela","year":"2014","unstructured":"Vaghela, S.D., Patel, P.: Web page classification techniques - a comprehensive survey. IJIRSET 6, 17472\u201317479 (2014)","journal-title":"IJIRSET"},{"key":"15_CR26","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"15_CR27","unstructured":"Wang, S., Li, Z., Khabsa, M., Fang, H., Ma, H., Tang, J.: Linformer: self-attention with linear complexity. arXiv preprint arXiv:2006.04768 (2020)"},{"key":"15_CR28","unstructured":"Ye, Z., Guo, Q., Gan, Q., Qiu, X., Zhang, Z.: BP-transformer: modelling long-range context via binary partitioning. arXiv preprint arXiv:1911.04070 (2019)"},{"key":"15_CR29","unstructured":"Zaheer, M., et al.: Big bird: transformers for longer sequences (2020)"},{"issue":"11","key":"15_CR30","first-page":"1625","volume":"6","author":"S Zhong","year":"2011","unstructured":"Zhong, S., Zou, D.: Web page classification using an ensemble of support vector machine classifiers. J. Netw. 6(11), 1625 (2011)","journal-title":"J. Netw."}],"container-title":["Communications in Computer and Information Science","Optimization, Learning Algorithms and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-53025-8_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T20:15:21Z","timestamp":1706732121000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-53025-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031530241","9783031530258"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-53025-8_15","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"1 February 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"OL2A","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Optimization, Learning Algorithms and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ponta Delgada","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ol2a2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ol2a.ipb.pt\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"162","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}