{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T09:47:49Z","timestamp":1766137669820,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030731960"},{"type":"electronic","value":"9783030731977"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-73197-7_14","type":"book-chapter","created":{"date-parts":[[2021,4,6]],"date-time":"2021-04-06T19:03:01Z","timestamp":1617735781000},"page":"219-234","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Semi-structured Data Classification Model with Integrating Tag Sequence and Ngram"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2306-9823","authenticated-orcid":false,"given":"Lijun","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Ning","family":"Li","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Zhanhuai","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,6]]},"reference":[{"issue":"1","key":"14_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1142\/S0218213017600028","volume":"26","author":"G Costa","year":"2017","unstructured":"Costa, G., Ortale, R.: XML clustering by structure-constrained phrases: a fully-automatic approach using contextualized N-Grams. Int. J. Artif. Intell. Tools 26(1), 1\u201324 (2017)","journal-title":"Int. J. Artif. Intell. Tools"},{"doi-asserted-by":"crossref","unstructured":"Costa, G., Ortale, R.: Fully-automatic XML clustering by structure-constrained phrases. In: Proceedings IEEE 27th International Conference on Tools with Artificial Intelligence, Vietri sul Mare, Italy, pp. 146\u2013153 (2015)","key":"14_CR2","DOI":"10.1109\/ICTAI.2015.34"},{"issue":"6","key":"14_CR3","doi-asserted-by":"publisher","first-page":"1383","DOI":"10.1109\/TKDE.2016.2525768","volume":"28","author":"J Tekli","year":"2016","unstructured":"Tekli, J.: An overview on XML semantic disambiguation from unstructured text to semi-structured data: background, applications, and ongoing challenges. IEEE Trans. Knowl. Data Eng. 28(6), 1383\u20131407 (2016)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"1","key":"14_CR4","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/s10115-015-0820-0","volume":"46","author":"M Piernik","year":"2015","unstructured":"Piernik, M., Brzezinski, D., Morzy, T.: Clustering XML documents by patterns. Knowl. Inf. Syst. 46(1), 185\u2013212 (2015). https:\/\/doi.org\/10.1007\/s10115-015-0820-0","journal-title":"Knowl. Inf. Syst."},{"key":"14_CR5","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1016\/j.neucom.2015.02.095","volume":"174","author":"X Zhao","year":"2016","unstructured":"Zhao, X., Bi, X., Wang, G., et al.: Uncertain XML documents classification using extreme learning machine. Neurocomputing 174, 375\u2013382 (2016)","journal-title":"Neurocomputing"},{"doi-asserted-by":"crossref","unstructured":"Costa, G., Ortale, R.: Mining cluster patterns in XML corpora via latent topic models of content and structure. In: Proceedings 23rd Pacific-Asia Conference on Knowledge Discovery and Data Mining, Macau, China, pp. 237\u2013248 (2019)","key":"14_CR6","DOI":"10.1007\/978-3-030-16142-2_19"},{"unstructured":"Tran, T., Nayak, R., Bruza, P.D.: Combining structure and content similarities for XML document clustering. In: Proceeedings the 7th Australasian Data Mining Conference (AusDM 2008), pp. 219\u2013226 (2008)","key":"14_CR7"},{"doi-asserted-by":"crossref","unstructured":"Ghosh, S., Mitra, P.: Combining content and structure similarity for XML document classification using composite SVM Kernels. In: Proceedings 19th International Conference on Pattern Recognition (ICPR 2008), pp. 1\u20134 (2008)","key":"14_CR8","DOI":"10.1109\/ICPR.2008.4761539"},{"key":"14_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1007\/978-3-642-16720-1_12","volume-title":"Web-Age Information Management","author":"L Zhang","year":"2010","unstructured":"Zhang, L., Li, Z., Chen, Q., Li, N.: Structure and content similarity for clustering XML documents. In: Shen, H.T., et al. (eds.) WAIM 2010. LNCS, vol. 6185, pp. 116\u2013124. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-16720-1_12"},{"doi-asserted-by":"crossref","unstructured":"Yuan, J., Xu, D., Bao, H.: An efficient XML documents classification method based on structure and keywords frequency. J. Comput. Res. Dev. 43(8), 1361\u20131367 (2006)","key":"14_CR10","DOI":"10.1360\/crad20060808"},{"doi-asserted-by":"crossref","unstructured":"Costa, G., Ortale, R., Ritacco, E.: Effective XML classification using content and structural information via rule learning. In: Proceedings the 23rd IEEE International Conference on Tools with Artificial Intelligence (ICTAI 2011), pp. 102\u2013109 (2011)","key":"14_CR11","DOI":"10.1109\/ICTAI.2011.24"},{"doi-asserted-by":"crossref","unstructured":"Yang, J., Zhang, F.: XML document classification using extended VSM. In: Proceedings 6th International Workshop of the Initiative for the Evaluation of XML Retrieval, pp. 234\u2013244 (2008)","key":"14_CR12","DOI":"10.1007\/978-3-540-85902-4_21"},{"doi-asserted-by":"crossref","unstructured":"Yang, J., Wang, S.: Extended VSM for XML document classification using frequent subtrees. In: Proceedings 8th International Workshop of the Initiative for the Evaluation of XML Retrieval, pp. 441\u2013448 (2009)","key":"14_CR13","DOI":"10.1007\/978-3-642-14556-8_44"},{"issue":"5","key":"14_CR14","doi-asserted-by":"publisher","first-page":"1217","DOI":"10.1007\/s11280-013-0230-8","volume":"17","author":"X Zhao","year":"2013","unstructured":"Zhao, X., Bi, X., Qiao, B.: Probability based voting extreme learning machine for multiclass XML documents classification. World Wide Web 17(5), 1217\u20131231 (2013). https:\/\/doi.org\/10.1007\/s11280-013-0230-8","journal-title":"World Wide Web"},{"issue":"1","key":"14_CR15","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/s10791-017-9314-x","volume":"21","author":"G Costa","year":"2017","unstructured":"Costa, G., Ortale, R.: Machine learning techniques for XML (co-)clustering by structure-constrained phrases. Inf. Retrieval J. 21(1), 24\u201355 (2017). https:\/\/doi.org\/10.1007\/s10791-017-9314-x","journal-title":"Inf. Retrieval J."},{"unstructured":"Mladenic, D., Globelnik, M.: Word sequences as features in text learning. the 17th Electrotechnical and Computer Science Conference (ERK 1998), Slovenia, pp. 145\u2013148 (1998)","key":"14_CR16"},{"unstructured":"Furnkranz, J.: A Study Using n-gram features for text categorization. Austrian Res. Instit. Artif. Intell. 3, 1\u201310 (1998)","key":"14_CR17"},{"unstructured":"Zhang, Y., Zhang, L., Yan, J., Li, Z.: Using association features to enhance the performance of Naive Bayes text classifier. In: Proceedings the 5th International Conference on Computational Intelligence and Multimedia Applications, pp. 336\u2013441 (2003)","key":"14_CR18"},{"doi-asserted-by":"crossref","unstructured":"Meretakis, D., Wuthrich, B.: Extending Naive Bayes classifiers using long itemsets. In: Proceedings the 5th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (SIGKDD 1999), pp. 165\u2013174 (1999)","key":"14_CR19","DOI":"10.1145\/312129.312222"},{"doi-asserted-by":"crossref","unstructured":"Tesar, R., Strnad, V., Jezek, K., Poesio, M.: Extending the single words-based document model: a comparison of bigrams and 2-itemsets. In: Proceedings the ACM Symposium on Document Engineering, pp. 138\u2013146 (2006)","key":"14_CR20","DOI":"10.1145\/1166160.1166197"},{"key":"14_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/978-3-642-29253-8_8","volume-title":"Web Technologies and Applications","author":"L Zhang","year":"2012","unstructured":"Zhang, L., Li, Z., Chen, Q., Li, X., Li, N., Lou, Y.: Mining frequent association tag sequences for clustering XML documents. In: Sheng, Q.Z., Wang, G., Jensen, C.S., Xu, G. (eds.) APWeb 2012. LNCS, vol. 7235, pp. 85\u201396. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-29253-8_8"},{"unstructured":"Caropreso, M.F., Matwin, S., Sebastiani, F.: Statistical phrases in automated text categorization. Technical report IEI-B4-07-2000. Istituto di Elaborazione dell\u2019Informazione, Pisa, Italy (2000)","key":"14_CR22"},{"unstructured":"Mitra, M., Buckley, C., Singhal, A., Cardie, C: An analysis of statistical and syntactic phrases. In: The 5th International Conference on Recherche d\u2019Information Assistee par Ordinateur (RIAO 1997), Montreal, CA, pp. 200\u2013214 (1997)","key":"14_CR23"},{"doi-asserted-by":"crossref","unstructured":"Dumais, S.T., Platt, J., Heckerman, D., Sahami, M.: Inductive learning algorithms and representations for text categorization. In: The 7th ACM International Conference on Information and Knowledge Management (CIKM 1998), New York, US, pp. 148\u2013155. ACM Press (1998)","key":"14_CR24","DOI":"10.1145\/288627.288651"},{"key":"14_CR25","first-page":"121","volume":"35","author":"R Tesar","year":"2005","unstructured":"Tesar, R., Fiala, D., Rousselot, F., Jezek, K.: A comparison of two algorithms for discovering repeated word sequences. WIT transaction on information and communication technologies 35, 121\u2013131 (2005)","journal-title":"WIT transaction on information and communication technologies"},{"unstructured":"Yang, Y., Pedersen, J.O.: A comparative study on feature selection in text categorization. In: The 14th International Conference on Machine Learning (ICML 1997), pp. 412\u2013420 (1997)","key":"14_CR26"},{"doi-asserted-by":"crossref","unstructured":"Rezk, N.G., Sarhan, A., Algergawy, A.: Clustering of XML documents based on structure and aggregated content. In: Proceedings 11th International Conference on Computer Engineering and Systems, Cairo, Egypt, pp. 93\u2013102 (2016)","key":"14_CR27","DOI":"10.1109\/ICCES.2016.7821981"},{"key":"14_CR28","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1145\/1394251.1394255","volume":"42","author":"L Denoyer","year":"2008","unstructured":"Denoyer, L., Gallinari, P.: Report on the XML mining track at INEX 2007 categorization and clustering of XML documents. SIGIR forum 42, 22\u201328 (2008)","journal-title":"SIGIR forum"},{"key":"14_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/11730262_6","volume-title":"Knowledge Discovery from XML Documents","author":"A Kurt","year":"2006","unstructured":"Kurt, A., Tozal, E.: Classification of XSLT-generated web documents with support vector machines. In: Nayak, R., Zaki, M.J. (eds.) KDXD 2006. LNCS, vol. 3915, pp. 33\u201342. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11730262_6"},{"doi-asserted-by":"crossref","unstructured":"Wu, J., Tang, J.: A bottom-up approach for XML documents classification. In: The 2008 International Symposium on Database Engineering and Applications, Coimbra, Portugal, pp. 131\u2013137. ACM (2008)","key":"14_CR30","DOI":"10.1145\/1451940.1451960"},{"unstructured":"Zhang, L., Li, Z., Chen, Q., et al.: Classifying XML documents based on term semantics. Jilin Daxue Xuebao\/J. Jilin Univ. (Eng. Technol. Edn.) 42(6), 1510\u20131514 (2012)","key":"14_CR31"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-73197-7_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,7]],"date-time":"2021-08-07T15:14:02Z","timestamp":1628349242000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-73197-7_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030731960","9783030731977"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-73197-7_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"6 April 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 April 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/dm.iis.sinica.edu.tw\/DASFAA2021\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"490","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"98","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic this event was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}