{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T04:37:21Z","timestamp":1776746241646,"version":"3.51.2"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032052803","type":"print"},{"value":"9783032052810","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T00:00:00Z","timestamp":1758153600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T00:00:00Z","timestamp":1758153600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05281-0_18","type":"book-chapter","created":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T12:48:38Z","timestamp":1758199718000},"page":"272-279","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Vector Representations of\u00a0Multi-modal Data"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4060-3431","authenticated-orcid":false,"given":"Toni","family":"Taipalus","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2067-454X","authenticated-orcid":false,"given":"Jiaheng","family":"Lu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,18]]},"reference":[{"key":"18_CR1","doi-asserted-by":"publisher","unstructured":"Cheng, D., Liu, M.: CNNs based multi-modality classification for ad diagnosis. In: 2017 10th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI), pp.\u00a01\u20135 (2017). https:\/\/doi.org\/10.1109\/CISP-BMEI.2017.8302281","DOI":"10.1109\/CISP-BMEI.2017.8302281"},{"key":"18_CR2","doi-asserted-by":"publisher","unstructured":"Chung, Y., Glass, J.R.: Speech2vec: A sequence-to-sequence framework for learning word embeddings from speech. In: Yegnanarayana, B. (ed.) 19th Annual Conference of the International Speech Communication Association. pp. 811\u2013815. ISCA (2018https:\/\/doi.org\/10.21437\/INTERSPEECH.2018-2341","DOI":"10.21437\/INTERSPEECH.2018-2341"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Chung, Y.A., Weng, W.H., Tong, S., Glass, J.: Towards unsupervised speech-to-text translation. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7170\u20137174. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683550"},{"key":"18_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109272","volume":"137","author":"Y Gong","year":"2023","unstructured":"Gong, Y., Cosma, G.: Improving visual-semantic embeddings by learning semantically-enhanced hard negatives for cross-modal information retrieval. Pattern Recogn. 137, 109272 (2023). https:\/\/doi.org\/10.1016\/j.patcog.2022.109272","journal-title":"Pattern Recogn."},{"issue":"8","key":"18_CR5","doi-asserted-by":"publisher","first-page":"3062","DOI":"10.1109\/TKDE.2019.2961343","volume":"33","author":"S Guo","year":"2021","unstructured":"Guo, S., Yao, N.: Document vector extension for documents classification. IEEE Trans. Knowl. Data Eng. 33(8), 3062\u20133074 (2021). https:\/\/doi.org\/10.1109\/TKDE.2019.2961343","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"18_CR6","unstructured":"Herdade, S., Kappeler, A., Boakye, K., Soares, J.: Image captioning: transforming objects into words. Adv. Neural Inf. Processing Syst. 32 (2019)"},{"issue":"3","key":"18_CR7","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/tbdata.2019.2921572","volume":"7","author":"J Johnson","year":"2021","unstructured":"Johnson, J., Douze, M., Jegou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2021). https:\/\/doi.org\/10.1109\/tbdata.2019.2921572","journal-title":"IEEE Trans. Big Data"},{"key":"18_CR8","unstructured":"Kazemi, S.M., et al.: Time2vec: Learning a vector representation of time (2019). https:\/\/arxiv.org\/abs\/1907.05321"},{"issue":"1","key":"18_CR9","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1109\/TSA.2005.853206","volume":"14","author":"T Kinnunen","year":"2006","unstructured":"Kinnunen, T., Karpov, E., Franti, P.: Real-time speaker identification and verification. IEEE Trans. Audio Speech Lang. Process. 14(1), 277\u2013288 (2006). https:\/\/doi.org\/10.1109\/TSA.2005.853206","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"18_CR10","doi-asserted-by":"publisher","unstructured":"Ko, K., Park, S., Ko, H.: Convolutional feature vectors and support vector machine for animal sound classification. In: 2018 40th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC), pp. 376\u2013379 (2018). https:\/\/doi.org\/10.1109\/EMBC.2018.8512408","DOI":"10.1109\/EMBC.2018.8512408"},{"key":"18_CR11","doi-asserted-by":"publisher","unstructured":"Kriegel, H.P., Brecheisen, S., Kr\u00f6ger, P., Pfeifle, M., Schubert, M.: Using sets of feature vectors for similarity search on voxelized CAD objects. In: Proceedings of the 2003 ACM SIGMOD International Conference on Management of Data, pp. 587\u2013598. SIGMOD \u201903, ACM (2003). https:\/\/doi.org\/10.1145\/872757.872828","DOI":"10.1145\/872757.872828"},{"key":"18_CR12","unstructured":"Lewis, M.: Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension (2019). https:\/\/arxiv.org\/abs\/1910.13461"},{"key":"18_CR13","doi-asserted-by":"publisher","unstructured":"Lu, J., Holubov\u00e1, I.: Multi-model databases: A new journey to handle the variety of data. ACM Comput. Surv. 52(3) (2019). https:\/\/doi.org\/10.1145\/3323214","DOI":"10.1145\/3323214"},{"key":"18_CR14","unstructured":"Lu, Y., Zhao, W., Sun, N., Wang, J.: Enhancing multimodal knowledge graph representation learning through triple contrastive learning. In: Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, pp. 5963\u20135971. ijcai.org (2024)"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Pan, J.J., Wang, J., Li, G.: Vector database management techniques and systems. In: Companion of the 2024 International Conference on Management of Data, pp. 597\u2013604. ACM (2024)","DOI":"10.1145\/3626246.3654691"},{"key":"18_CR16","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1016\/S0020-0255(02)00400-0","volume":"151","author":"P Perrin","year":"2003","unstructured":"Perrin, P., Petry, F.E.: Extraction and representation of contextual information for knowledge discovery in texts. Inf. Sci. 151, 125\u2013152 (2003)","journal-title":"Inf. Sci."},{"key":"18_CR17","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International conference on machine learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"18_CR18","doi-asserted-by":"publisher","unstructured":"Singh, S.P., Kumar, A., Darbari, H., Singh, L., Rastogi, A., Jain, S.: Machine translation using deep learning: An overview. In: 2017 International Conference on Computer, Communications and Electronics (Comptelix), pp. 162\u2013167 (2017). https:\/\/doi.org\/10.1109\/COMPTELIX.2017.8003957","DOI":"10.1109\/COMPTELIX.2017.8003957"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Sun, C., Myers, A., Vondrick, C., Murphy, K., Schmid, C.: VideoBERT: A joint model for video and language representation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7464\u20137473 (2019)","DOI":"10.1109\/ICCV.2019.00756"},{"key":"18_CR20","doi-asserted-by":"publisher","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.A.: Inception-v4, Inception-ResNet and the impact of residual connections on learning. In: Singh, S., Markovitch, S. (eds.) Proceedings of the 31st AAAI Conference on Artificial Intelligence, pp. 4278\u20134284. AAAI Press (2017). https:\/\/doi.org\/10.1609\/AAAI.V31I1.11231","DOI":"10.1609\/AAAI.V31I1.11231"},{"key":"18_CR21","doi-asserted-by":"publisher","unstructured":"Taipalus, T.: Vector database management systems: Fundamental concepts, use-cases, and current challenges. Cogn. Syst. Res. 85, 101216 (2024). https:\/\/doi.org\/10.1016\/j.cogsys.2024.101216","DOI":"10.1016\/j.cogsys.2024.101216"},{"key":"18_CR22","unstructured":"Tan, M., Le, Q.V.: Efficientnet: Rethinking model scaling for convolutional neural networks. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. vol.\u00a097. PMLR (2019)"},{"key":"18_CR23","doi-asserted-by":"publisher","unstructured":"Truong, Q.T., Salah, A., Lauw, H.: Multi-modal recommender systems: Hands-on exploration. In: Proceedings of the 15th ACM Conference on Recommender Systems, pp. 834\u2013837. RecSys \u201921, ACM (2021). https:\/\/doi.org\/10.1145\/3460231.3473324","DOI":"10.1145\/3460231.3473324"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Yuan, X., et al.: Multimodal contrastive training for visual representation learning. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 6995\u20137004. Computer Vision Foundation \/ IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00692"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, J., Yin, Z., Chen, P., Nichele, S.: Emotion recognition using multi-modal data and machine learning techniques: A tutorial and review. Inf. Fusion 59, 103\u2013126 (2020). https:\/\/doi.org\/10.1016\/j.inffus.2020.01.011","DOI":"10.1016\/j.inffus.2020.01.011"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Sidib\u00e9, D., Morel, O., M\u00e9riaudeau, F.: Deep multimodal fusion for semantic image segmentation: A survey. Image Vision Comput. 105, 104042 (2021). https:\/\/doi.org\/10.1016\/j.imavis.2020.104042","DOI":"10.1016\/j.imavis.2020.104042"}],"container-title":["Lecture Notes in Computer Science","Advances in Databases and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05281-0_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T03:46:18Z","timestamp":1776743178000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05281-0_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,18]]},"ISBN":["9783032052803","9783032052810"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05281-0_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,18]]},"assertion":[{"value":"18 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADBIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Advances in Databases and Information Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tampere","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Finland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adbis2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/adbis2025.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}