{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T12:22:50Z","timestamp":1763295770181,"version":"3.44.0"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030863333"},{"type":"electronic","value":"9783030863340"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86334-0_15","type":"book-chapter","created":{"date-parts":[[2021,9,3]],"date-time":"2021-09-03T20:16:02Z","timestamp":1630700162000},"page":"223-237","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Multimodal Attention-Based Learning for Imbalanced Corporate Documents Classification"],"prefix":"10.1007","author":[{"given":"Ibrahim Souleiman","family":"Mahamoud","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joris","family":"Voerman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aur\u00e9lie","family":"Joseph","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vincent Poulain","family":"d\u2019Andecy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jean-Marc","family":"Ogier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,2]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Schuster, D., et al.: Intellix-end-user trained information extraction for document archiving. In: 2013 12th International Conference on Document Analysis and Recognition. IEEE, pp. 101\u2013105 (2013)","DOI":"10.1109\/ICDAR.2013.28"},{"key":"15_CR2","unstructured":"Srivastava, N., Salakhutdinov, R.R.: Multimodal learning with deep Boltzmann machines. In: Pereira, F., Burges, C.J.C., Bottou, L., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems, vol. 25, pp. 2222\u20132230. Curran Associates Inc. (2012). http:\/\/papers.nips.cc\/paper\/4683-multimodal-learning-with-deep-boltzmann-machines.pdf"},{"key":"15_CR3","doi-asserted-by":"crossref","unstructured":"Bakkali, S., Ming, Z., Coustaty, M., Rusinol, M.: Visual and textual deep feature fusion for document image classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 562\u2013563 (2020)","DOI":"10.1109\/CVPRW50498.2020.00289"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR), pp. 991\u2013995. IEEE (2015)","DOI":"10.1109\/ICDAR.2015.7333910"},{"issue":"3","key":"15_CR5","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"0 Russakovsky","year":"2015","unstructured":"Russakovsky, 0, et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.A.: Inception-v4, inception-resnet and the impact of residual connections on learning. In: Thirty-First AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Zoph, B., Vasudevan, V., Shlens, J., Le, Q.V.: Learning transferable architectures for scalable image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8697\u20138710 (2018)","DOI":"10.1109\/CVPR.2018.00907"},{"key":"15_CR8","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"15_CR9","unstructured":"Joulin, A., Grave, E., Bojanowski, P., Douze, M., J\u00e9gou, H., Mikolov, T.: Fasttext.zip: compressing text classification models, arXiv preprint arXiv:1612.03651 (2016)"},{"key":"15_CR10","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding, arXiv preprint arXiv:1810.04805 (2018)"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A.-R., Hinton, G.: Speech recognition with deep recurrent neural networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"15_CR12","unstructured":"Zhou, P., Qi, Z., Zheng, S., Xu, J., Bao, H., Xu, B.: Text classification improved by integrating bidirectional LSTM with two-dimensional max pooling, arXiv preprint arXiv:1611.06639 (2016)"},{"key":"15_CR13","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1007\/978-3-030-43823-4_35","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"N Audebert","year":"2020","unstructured":"Audebert, N., Herold, C., Slimani, K., Vidal, C.: Multimodal deep networks for text and image-based document classification. In: Cellier, P., Driessens, K. (eds.) ECML PKDD 2019. CCIS, vol. 1167, pp. 427\u2013443. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-43823-4_35"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.-C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Kim, Y.: Convolutional neural networks for sentence classification, arXiv preprint arXiv:1408.5882 (2014)","DOI":"10.3115\/v1\/D14-1181"},{"key":"15_CR16","unstructured":"Luong, M.-T., Manning, C.D.: Stanford neural machine translation systems for spoken language domains. In: Proceedings of the International Workshop on Spoken Language Translation, pp. 76\u201379 (2015)"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Jain, R., Wigington, C.: Multimodal document image classification. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 71\u201377. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00021"},{"key":"15_CR18","unstructured":"G\u00f3rriz, M., Antony, J., McGuinness, K., Gir\u00f3-i Nieto, X., O\u2019Connor, N.E.: Assessing knee OA severity with CNN attention-based end-to-end architectures, arXiv preprint arXiv:1908.08856 (2019)"},{"key":"15_CR19","unstructured":"Jetley, S., Lord, N.A., Lee, N., Torr, P.H.: Learn to pay attention, arXiv preprint arXiv:1804.02391 (2018)"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Xian, Y., Schiele, B., Akata, Z.: Zero-shot learning-the good, the bad and the ugly. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4582\u20134591 (2017)","DOI":"10.1109\/CVPR.2017.328"},{"issue":"6266","key":"15_CR21","doi-asserted-by":"publisher","first-page":"1332","DOI":"10.1126\/science.aab3050","volume":"350","author":"BM Lake","year":"2015","unstructured":"Lake, B.M., Salakhutdinov, R., Tenenbaum, J.B.: Human-level concept learning through probabilistic program induction. Science 350(6266), 1332\u20131338 (2015)","journal-title":"Science"},{"key":"15_CR22","unstructured":"Santoro, A., Bartunov, S., Botvinick, M., Wierstra, D., Lillicrap, T.: One-shot learning with memory-augmented neural networks, arXiv preprint arXiv:1605.06065 (2016)"},{"key":"15_CR23","unstructured":"Koch, G., Zemel, R., Salakhutdinov, R.: Siamese neural networks for one-shot image recognition. In: ICML Deep Learning Workshop, vol. 2. Lille (2015)"},{"issue":"8","key":"15_CR24","doi-asserted-by":"publisher","first-page":"2488","DOI":"10.1007\/s10489-020-01637-z","volume":"50","author":"E Lin","year":"2020","unstructured":"Lin, E., Chen, Q., Qi, X.: Deep reinforcement learning for imbalanced classification. Appl. Intell. 50(8), 2488\u20132502 (2020). https:\/\/doi.org\/10.1007\/s10489-020-01637-z","journal-title":"Appl. Intell."},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Martin, L.: CamemBERT: a tasty French language model. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.645"},{"key":"15_CR26","unstructured":"Cheng, H., Zhou, J.T., Tay, W.P., Wen, B.: Attentive graph neural networks for few-shot learning (2020)"},{"key":"15_CR27","unstructured":"Nasr, G.E., Badr, E.A., Joun, C.: Cross entropy error function in neural networks: forecasting gasoline demand. In: Applied Intelligence, pp. 1\u201315 (2002)"},{"key":"15_CR28","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization (2017)"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Das, A., Roy, S., Bhattacharya, U., Parui, S.K.: Document image classification with intra-domain transfer learning and stacked generalization of deep convolutional neural networks (2018)","DOI":"10.1109\/ICPR.2018.8545630"},{"key":"15_CR30","unstructured":"Lin, E., Chen, Q., Qi, X.: Deep reinforcement learning for imbalanced classification (2019)"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2021"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86334-0_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T22:05:20Z","timestamp":1756850720000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86334-0_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030863333","9783030863340"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86334-0_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"2 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lausanne","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iapr.org\/icdar2021","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"340","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"182","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Additionally, 13 competition reports are included.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}