{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,9]],"date-time":"2025-11-09T03:55:38Z","timestamp":1762660538359,"version":"3.40.3"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031810091"},{"type":"electronic","value":"9783031810107"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-81010-7_3","type":"book-chapter","created":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T04:52:26Z","timestamp":1740459146000},"page":"35-59","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Framework for\u00a0the\u00a0Classification and\u00a0Exploration of\u00a0Semi-structured Data"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-9719-6735","authenticated-orcid":false,"given":"Louis Willem","family":"Burger","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4757-5832","authenticated-orcid":false,"given":"Jan","family":"van Vuuren","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,26]]},"reference":[{"issue":"4","key":"3_CR1","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1002\/wics.101","volume":"2","author":"H Abdi","year":"2010","unstructured":"Abdi, H., Williams, L.J.: Principal component analysis. Wiley Interdisc. Rev. Comput. Stat. 2(4), 433\u2013459 (2010)","journal-title":"Wiley Interdisc. Rev. Comput. Stat."},{"key":"3_CR2","volume-title":"Data on the Web: From Relations to Semistructured Data and XML","author":"S Abiteboul","year":"2000","unstructured":"Abiteboul, S., Buneman, P., Suciu, D.: Data on the Web: From Relations to Semistructured Data and XML. Morgan Kaufmann Publishers, Burlington (2000)"},{"key":"3_CR3","volume-title":"Data Clustering: Algorithms and Applications","author":"CC Aggarwal","year":"2014","unstructured":"Aggarwal, C.C., Reddy, C.K.: Data Clustering: Algorithms and Applications. CRC Press, Boca Raton (2014)"},{"key":"3_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-3223-4","volume-title":"Mining Text Data","author":"CC Aggarwal","year":"2012","unstructured":"Aggarwal, C.C., Zhai, C.X.: Mining Text Data. Springer, New York (2012)"},{"issue":"2","key":"3_CR5","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1016\/j.datak.2007.03.016","volume":"63","author":"A Ahmad","year":"2007","unstructured":"Ahmad, A., Dey, L.: A k-mean clustering algorithm for mixed numeric and categorical data. Data Knowl. Eng. 63(2), 503\u2013527 (2007)","journal-title":"Data Knowl. Eng."},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Bibaeva, V.: Using metaheuristics for hyper-parameter optimization of convolutional neural networks. In: Proceedings of the 28th IEEE International Workshop on Machine Learning for Signal Processing, pp.\u00a01\u20136. Aalborg (2018)","DOI":"10.1109\/MLSP.2018.8516989"},{"key":"3_CR7","volume-title":"Pattern Recognition and Machine Learning","author":"CM Bishop","year":"2006","unstructured":"Bishop, C.M.: Pattern Recognition and Machine Learning, vol. 4. Springer, New York (2006)"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Blei, D.M., Jordan, M.I.: Modeling annotated data. In: Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 127\u2013134. Toronto (2003)","DOI":"10.1145\/860435.860460"},{"key":"3_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-4884-5","volume-title":"Principles of Data Mining","author":"M Bramer","year":"2013","unstructured":"Bramer, M.: Principles of Data Mining, vol. 2. Springer, London (2013)"},{"key":"3_CR10","volume-title":"Classification and Regression Trees","author":"L Breiman","year":"1984","unstructured":"Breiman, L., Friedman, J.H., Olshen, R.A., Stone, C.J.: Classification and Regression Trees. Wadsworth International Group, Belmont (1984)"},{"issue":"1","key":"3_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/03610927408827101","volume":"3","author":"T Cali\u0144ski","year":"1974","unstructured":"Cali\u0144ski, T., Harabasz, J.: A dendrite method for cluster analysis. Commun. Stat.-Theory Methods 3(1), 1\u201327 (1974)","journal-title":"Commun. Stat.-Theory Methods"},{"key":"3_CR12","doi-asserted-by":"publisher","DOI":"10.1515\/9783112316009","volume-title":"Syntactic Structures","author":"N Chomsky","year":"1957","unstructured":"Chomsky, N.: Syntactic Structures. Mouton & Co., The Hague (1957)"},{"key":"3_CR13","unstructured":"Davenport, T., Kirby, J.: Strategies for remaining gainfully employed in an era of very smart machines (2015). https:\/\/hbr.org\/2015\/06\/beyond-automation. Accessed 01 Dec 2022"},{"issue":"2","key":"3_CR14","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1109\/TPAMI.1979.4766909","volume":"1","author":"DL Davies","year":"1979","unstructured":"Davies, D.L., Bouldin, D.W.: A cluster separation measure. IEEE Trans. Pattern Anal. Mach. Intell. 1(2), 224\u2013227 (1979)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3_CR15","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 17th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 4171\u20134186. Minneapolis (MN) (2019)"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Elkin, P.L., et al.: Using artificial intelligence with natural language processing to combine electronic health record\u2019s structured and free text data to identify nonvalvular atrial fibrillation to decrease strokes and death: evaluation and case-control study. J. Med. Internet Res. 23(11), Manuscript e28946 (2021)","DOI":"10.2196\/28946"},{"key":"3_CR17","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press, Cambridge (2016)"},{"key":"3_CR18","unstructured":"Gruetzemacher, R.: The power of natural language processing (2022). https:\/\/hbr.org\/2022\/04\/the-power-of-natural-language-processing. Accessed 06 May 2022"},{"issue":"1","key":"3_CR19","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1148\/radiology.143.1.7063747","volume":"143","author":"JA Hanley","year":"1982","unstructured":"Hanley, J.A., McNeil, B.J.: The meaning and use of the area under a receiver operating characteristic (ROC) curve. Radiology 143(1), 29\u201336 (1982)","journal-title":"Radiology"},{"key":"3_CR20","volume-title":"An Introduction to Statistical Learning","author":"T Hastie","year":"2009","unstructured":"Hastie, T., Tibshirani, R., Friedman, J.: An Introduction to Statistical Learning, vol. 1. Springer, New York (2009)"},{"key":"3_CR21","unstructured":"Honnibal, M., Montani, I.: spaCy 2: natural language understanding with Bloom embeddings, convolutional neural networks and incremental parsing (2017). https:\/\/spacy.io\/. Accessed 03 June 2022"},{"issue":"8","key":"3_CR22","first-page":"34","volume":"3","author":"Z Huang","year":"1997","unstructured":"Huang, Z.: A fast clustering algorithm to cluster very large categorical data sets in data mining. Data Min. Knowl. Disc. 3(8), 34\u201339 (1997)","journal-title":"Data Min. Knowl. Disc."},{"issue":"3","key":"3_CR23","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1023\/A:1009769707641","volume":"2","author":"Z Huang","year":"1998","unstructured":"Huang, Z.: Extensions to the k-means algorithm for clustering large data sets with categorical values. Data Min. Knowl. Disc. 2(3), 283\u2013304 (1998)","journal-title":"Data Min. Knowl. Disc."},{"key":"3_CR24","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/BF01908075","volume":"2","author":"L Hubert","year":"1985","unstructured":"Hubert, L., Arabie, P.: Comparing partitions. J. Classif. 2, 193\u2013218 (1985)","journal-title":"J. Classif."},{"issue":"6245","key":"3_CR25","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1126\/science.aaa8415","volume":"349","author":"MI Jordan","year":"2015","unstructured":"Jordan, M.I., Mitchell, T.M.: Machine learning: trends, perspectives, and prospects. Science 349(6245), 255\u2013260 (2015)","journal-title":"Science"},{"key":"3_CR26","volume-title":"Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition","author":"D Jurafsky","year":"2008","unstructured":"Jurafsky, D., Martin, J.: Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition, vol. 2. Pearson Education, Upper Saddle River (2008)"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Kanada, Y.: A vectorization technique of hashing and its application to several sorting algorithms. In: Proceedings of the 1990 International Conference on Databases, Parallel Architectures, and Their Applications, pp. 147\u2013151. Miami Beach (FL) (1990)","DOI":"10.1109\/PARBSE.1990.77135"},{"key":"3_CR28","volume-title":"Finding Groups in Data: An Introduction to Cluster Analysis","author":"L Kaufman","year":"2009","unstructured":"Kaufman, L., Rousseeuw, P.J.: Finding Groups in Data: An Introduction to Cluster Analysis. Wiley, New York (2009)"},{"key":"3_CR29","unstructured":"Kazmaier, J.: A framework for evaluating unstructured text data using sentiment analysis. Ph.D. dissertation, Stellenbosch University, Stellenbosch (2020)"},{"key":"3_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12911-021-01665-w","volume":"21","author":"T Khaleghi","year":"2021","unstructured":"Khaleghi, T., Murat, A., Arslanturk, S.: A tree based approach for multi-class classification of surgical procedures using structured and unstructured data. BMC Med. Inform. Decis. Mak. 21, 1\u201312 (2021)","journal-title":"BMC Med. Inform. Decis. Mak."},{"key":"3_CR31","unstructured":"King, M.A.: Ensemble learning techniques for structured and unstructured data. Ph.D. dissertation, Virginia Polytechnic Institute and State University, Blacksburg (VA) (2015)"},{"key":"3_CR32","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-6849-3","volume-title":"Applied Predictive Modeling","author":"M Kuhn","year":"2013","unstructured":"Kuhn, M., Johnson, K.: Applied Predictive Modeling. Springer, New York (2013)"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Lichtnwalter, R., Chawla, N.V.: Link prediction: fair and effective evaluation. In: Proceedings of the 2012 IEEE\/ACM International Conference on Advances in Social Networks Analysis and Mining, pp. 376\u2013383. Istanbul (2012)","DOI":"10.1109\/ASONAM.2012.68"},{"key":"3_CR34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-02145-9","volume-title":"Sentiment Analysis and Opinion Mining","author":"B Liu","year":"2012","unstructured":"Liu, B.: Sentiment Analysis and Opinion Mining. Morgan & Claypool, San Rafael (2012)"},{"key":"3_CR35","unstructured":"Loper, E., Bird, S.: NLTK: the natural language toolkit. In: Proceedings of the Association for Computational Linguistics Interactive Poster and Demonstration Sessions, pp. 214\u2013217. Barcelona (2004)"},{"issue":"4","key":"3_CR36","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1147\/rd.14.0309","volume":"1","author":"HP Luhn","year":"1957","unstructured":"Luhn, H.P.: A statistical approach to mechanized encoding and searching of literary information. IBM J. Res. Dev. 1(4), 309\u2013317 (1957)","journal-title":"IBM J. Res. Dev."},{"key":"3_CR37","unstructured":"Marr, B.: What\u2019s the difference between structured, semi-structured and unstructured data? (2019). https:\/\/www.forbes.com\/sites\/bernardmarr\/2019\/10\/18\/whats-the-difference-between-structured-semi-structured-and-unstructured-data\/?sh=66574ff52b4d. Accessed 25 Oct 2022"},{"key":"3_CR38","unstructured":"McInnes, L., Healy, J., Melville, J.: UMAP: uniform manifold approximation and projection for dimension reduction (2018). https:\/\/arxiv.org\/abs\/1802.03426. Accessed 01 Mar 2023"},{"key":"3_CR39","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. Poster at the 2013 International Conference on Learning Representations, Scottsdale (AZ) (2013)"},{"issue":"17","key":"3_CR40","doi-asserted-by":"publisher","first-page":"2729","DOI":"10.1002\/sim.1840","volume":"23","author":"S Ng","year":"2004","unstructured":"Ng, S., McLachlan, G., Yau, K.K., Lee, A.H.: Modelling the distribution of ischaemic stroke-specific survival time using an EM-based mixture approach with random effects adjustment. Stat. Med. 23(17), 2729\u20132744 (2004)","journal-title":"Stat. Med."},{"key":"3_CR41","volume-title":"Handbook of Statistical Analysis and Data Mining Applications","author":"R Nisbet","year":"2009","unstructured":"Nisbet, R., Elder, J., Miner, G.D.: Handbook of Statistical Analysis and Data Mining Applications. Academic Press, Orlando (2009)"},{"issue":"4","key":"3_CR42","first-page":"93","volume":"52","author":"J Pag\u00e8s","year":"2004","unstructured":"Pag\u00e8s, J.: Analyse factorielle de donnees mixtes: Principe et exemple d\u2019application. Revue de Statistique Appliqu\u00e9e 52(4), 93\u2013111 (2004)","journal-title":"Revue de Statistique Appliqu\u00e9e"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"Pang, B., Lee, L.: Opinion mining and sentiment analysis. Found. Trends\u00ae Inf. Retrieval 2(1\u20132), 1\u2013135 (2008)","DOI":"10.1561\/1500000011"},{"issue":"85","key":"3_CR44","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: \u00c9douard Duchesnay: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12(85), 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"3_CR45","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing, pp. 1532\u20131543. Doha (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"3_CR46","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training. Technical report, OpenAI, San Francisco (CA) (2018)"},{"key":"3_CR47","unstructured":"Ramos, J.: Using TF-IDF to determine word relevance in document queries. In: Proceedings of the 1st Instructional Conference on Machine Learning, vol.\u00a0242, pp. 29\u201348. Los Angeles, CA (2003)"},{"key":"3_CR48","unstructured":"Roberts, E.: Natural language processing: history (2004). https:\/\/cs.stanford.edu\/people\/eroberts\/courses\/soco\/projects\/2004-05\/nlp\/overview_history.html. Accessed 03 Dec 2022"},{"key":"3_CR49","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","volume":"20","author":"PJ Rousseeuw","year":"1987","unstructured":"Rousseeuw, P.J.: Silhouettes: a graphical aid to the interpretation and validation of cluster analysis. J. Comput. Appl. Math. 20, 53\u201365 (1987)","journal-title":"J. Comput. Appl. Math."},{"key":"3_CR50","unstructured":"Schapire, R.E.: A brief introduction to boosting. In: Proceedings of the 16th International Joint Conference on Artificial Intelligence, pp. 1401\u20131406. Stockholm (1999)"},{"key":"3_CR51","volume-title":"Introduction to Information Retrieval","author":"H Sch\u00fctze","year":"2008","unstructured":"Sch\u00fctze, H., Manning, C.D., Raghavan, P.: Introduction to Information Retrieval, vol. 39. Cambridge University Press, New York (2008)"},{"issue":"1","key":"3_CR52","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F Sebastiani","year":"2002","unstructured":"Sebastiani, F.: Machine learning in automated text categorization. ACM Comput. Surv. 34(1), 1\u201347 (2002)","journal-title":"ACM Comput. Surv."},{"key":"3_CR53","unstructured":"Shah, T.: About train, validation and test sets in machine learning (2017). https:\/\/towardsdatascience.com\/train-validation-and-test-sets-72cb40cba9e7. Accessed 02 May 2022"},{"key":"3_CR54","doi-asserted-by":"crossref","unstructured":"Strekalova, Y.A., Bouakkaz, M.: Semi-structured Data, pp.\u00a01\u20133. Springer, Cham (2017)","DOI":"10.1007\/978-3-319-32001-4_183-1"},{"key":"3_CR55","doi-asserted-by":"crossref","unstructured":"Sun, C., Qiu, X., Xu, Y., Huang, X.: How to fine-tune BERT for text classification? In: Proceedings of the 18th China National Conference: Chinese Computational Linguistics, pp. 194\u2013206. Kunming (2019)","DOI":"10.1007\/978-3-030-32381-3_16"},{"key":"3_CR56","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/j.inffus.2022.07.019","volume":"88","author":"N Tang","year":"2022","unstructured":"Tang, N., et al.: Improving the performance of lung nodule classification by fusing structured and unstructured data. Inf. Fusion 88, 161\u2013174 (2022)","journal-title":"Inf. Fusion"},{"key":"3_CR57","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st Conference on Neural Information Processing Systems, vol.\u00a030, pp. 1\u201311. Long Beach, CA (2017)"},{"key":"3_CR58","doi-asserted-by":"crossref","unstructured":"Van\u00a0de Velden, M., Iodice\u00a0D\u2019Enza, A., Markos, A.: Distance-based clustering of mixed data. Wiley Interdisc. Rev. Comput. Stat. 11(3), Manuscript e1456 (2019)","DOI":"10.1002\/wics.1456"},{"key":"3_CR59","doi-asserted-by":"crossref","unstructured":"Weinberger, K., Dasgupta, A., Langford, J., Smola, A., Attenberg, J.: Feature hashing for large scale multitask learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 1113\u20131120. Montreal (2009)","DOI":"10.1145\/1553374.1553516"},{"key":"3_CR60","doi-asserted-by":"crossref","unstructured":"Zhang, L., Wang, S., Liu, B.: Deep learning for sentiment analysis: a survey. Wiley Interdisc. Rev. Data Min. Knowl. Discov. 8(4), Manuscript e1253 (2018)","DOI":"10.1002\/widm.1253"}],"container-title":["Lecture Notes in Computer Science","Dynamics of Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-81010-7_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T04:52:44Z","timestamp":1740459164000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-81010-7_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031810091","9783031810107"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-81010-7_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"26 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on the Dynamics of Information Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kalamata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dis22024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dis2024.ujep.cz\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}