{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:42:05Z","timestamp":1758123725572,"version":"3.41.0"},"publisher-location":"Cham","reference-count":72,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705519"},{"type":"electronic","value":"9783031705526"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70552-6_12","type":"book-chapter","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T04:02:14Z","timestamp":1725940934000},"page":"199-218","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Privacy-Aware Document Visual Question Answering"],"prefix":"10.1007","author":[{"given":"Rub\u00e8n","family":"Tito","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Khanh","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marlon","family":"Tobaben","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Raouf","family":"Kerkouche","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohamed Ali","family":"Souibgui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kangsoo","family":"Jung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joonas","family":"J\u00e4lk\u00f6","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vincent Poulain","family":"D\u2019Andecy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aurelie","family":"Joseph","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Kang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ernest","family":"Valveny","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Antti","family":"Honkela","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mario","family":"Fritz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dimosthenis","family":"Karatzas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Abadi, M., et al.: Deep learning with differential privacy. In: Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security, pp. 308\u2013318 (2016)","DOI":"10.1145\/2976749.2978318"},{"key":"12_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1007\/978-3-642-24178-9_9","volume-title":"Information Hiding","author":"G \u00c1cs","year":"2011","unstructured":"\u00c1cs, G., Castelluccia, C.: I have a DREAM! (DiffeRentially privatE smArt Metering). In: Filler, T., Pevn\u00fd, T., Craver, S., Ker, A. (eds.) IH 2011. LNCS, vol. 6958, pp. 118\u2013132. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-24178-9_9"},{"key":"12_CR3","unstructured":"Amazon: Amazon textract (2021). https:\/\/aws.amazon.com\/textract\/. Accessed 10 Oct 2023"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Biten, A.F., et al.: ICDAR 2019 competition on scene text visual question answering. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1563\u20131570. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00251"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Biten, A.F., et al.: Scene text visual question answering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4291\u20134301 (2019)","DOI":"10.1109\/ICCV.2019.00439"},{"key":"12_CR6","unstructured":"Bonawitz, K., et al.: Practical secure aggregation for federated learning on user-held data. arXiv preprint arXiv:1611.04482 (2016)"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Carlini, N., Chien, S., Nasr, M., Song, S., Terzis, A., Tramer, F.: Membership inference attacks from first principles. In: 2022 IEEE Symposium on Security and Privacy (SP), pp. 1897\u20131914. IEEE (2022)","DOI":"10.1109\/SP46214.2022.9833649"},{"key":"12_CR8","unstructured":"Carlini, N., Ippolito, D., Jagielski, M., Lee, K., Tramer, F., Zhang, C.: Quantifying memorization across neural language models. arXiv preprint arXiv:2202.07646 (2022)"},{"key":"12_CR9","doi-asserted-by":"publisher","unstructured":"Cattan, Y., Choquette-Choo, C.A., Papernot, N., Thakurta, A.: Fine-tuning with differential privacy necessitates an additional hyperparameter search. CoRR abs\/2210.02156 (2022). https:\/\/doi.org\/10.48550\/arXiv.2210.02156. https:\/\/doi.org\/10.48550\/arXiv.2210.02156","DOI":"10.48550\/arXiv.2210.02156"},{"key":"12_CR10","doi-asserted-by":"publisher","unstructured":"De, S., Berrada, L., Hayes, J., Smith, S.L., Balle, B.: Unlocking high-accuracy differentially private image classification through scale. CoRR abs\/2204.13650 (2022). https:\/\/doi.org\/10.48550\/arXiv.2204.13650. https:\/\/doi.org\/10.48550\/arXiv.2204.13650","DOI":"10.48550\/arXiv.2204.13650"},{"key":"12_CR11","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186 (2019)"},{"key":"12_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1007\/11761679_29","volume-title":"Advances in Cryptology - EUROCRYPT 2006","author":"C Dwork","year":"2006","unstructured":"Dwork, C., Kenthapadi, K., McSherry, F., Mironov, I., Naor, M.: Our data, ourselves: privacy via distributed noise generation. In: Vaudenay, S. (ed.) EUROCRYPT 2006. LNCS, vol. 4004, pp. 486\u2013503. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11761679_29"},{"key":"12_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/11681878_14","volume-title":"Theory of Cryptography","author":"C Dwork","year":"2006","unstructured":"Dwork, C., McSherry, F., Nissim, K., Smith, A.: Calibrating noise to sensitivity in private data analysis. In: Halevi, S., Rabin, T. (eds.) TCC 2006. LNCS, vol. 3876, pp. 265\u2013284. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11681878_14"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Dwork, C., Roth, A.: The algorithmic foundations of differential privacy. Found. Trends\u00ae Theor. Comput. Sci. 9(3\u20134), 211\u2013407 (2014)","DOI":"10.1561\/0400000042"},{"key":"12_CR15","unstructured":"Fu, C., et al.: Label inference attacks against vertical federated learning. In: 31st USENIX Security Symposium (USENIX Security 2022), Boston, MA. USENIX Association (2022)"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Galli, F., Biswas, S., Jung, K., Cucinotta, T., Palamidessi, C.: Group privacy for personalized federated learning. In: Proceedings of the 9th International Conference on Information Systems Security and Privacy. SCITEPRESS - Science and Technology Publications (2023)","DOI":"10.5220\/0011885000003405"},{"key":"12_CR17","unstructured":"Geiping, J., Bauermeister, H., Dr\u00f6ge, H., Moeller, M.: Inverting gradients - how easy is it to break privacy in federated learning? In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol.\u00a033, pp. 16937\u201316947. Curran Associates, Inc. (2020)"},{"key":"12_CR18","unstructured":"Gopi, S., Lee, Y.T., Wutschitz, L.: Numerical composition of differential privacy. In: Ranzato, M., Beygelzimer, A., Dauphin, Y.N., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, 6\u201314 December 2021, virtual, pp. 11631\u201311642 (2021). https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/6097d8f3714205740f30debe1166744e-Abstract.html"},{"key":"12_CR19","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning, ICML 2019, 9\u201315 June 2019, Long Beach, California, USA. Proceedings of Machine Learning Research, vol.\u00a097, pp. 2790\u20132799. PMLR (2019). http:\/\/proceedings.mlr.press\/v97\/houlsby19a.html"},{"key":"12_CR20","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, 25\u201329 April 2022. OpenReview.net (2022). https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"12_CR21","unstructured":"Hu, P., Wang, Z., Sun, R., Wang, H., Xue, M.: M$$^{4}$$i: multi-modal models membership inference. In: Advances in Neural Information Processing Systems, vol. 35, pp. 1867\u20131882 (2022)"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 4083\u20134091 (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Ippolito, D., et al.: Preventing verbatim memorization in language models gives a false sense of privacy. arXiv preprint arXiv:2210.17546 (2022)","DOI":"10.18653\/v1\/2023.inlg-main.3"},{"key":"12_CR24","unstructured":"Kerkouche, R., \u00c1cs, G., Castelluccia, C., Genev\u00e8s, P.: Constrained differentially private federated learning for low-bandwidth devices. In: de\u00a0Campos, C., Maathuis, M.H. (eds.) Proceedings of the Thirty-Seventh Conference on Uncertainty in Artificial Intelligence. Proceedings of Machine Learning Research, vol.\u00a0161, pp. 1756\u20131765. PMLR (2021)"},{"key":"12_CR25","doi-asserted-by":"publisher","unstructured":"Kerkouche, R., \u00c1cs, G., Castelluccia, C., Genev\u00e8s, P.: Compression boosts differentially private federated learning. In: 2021 IEEE European Symposium on Security and Privacy (EuroS &P), pp. 304\u2013318 (2021). https:\/\/doi.org\/10.1109\/EuroSP51992.2021.00029","DOI":"10.1109\/EuroSP51992.2021.00029"},{"key":"12_CR26","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1007\/978-3-031-19815-1_29","volume-title":"Computer Vision - ECCV 2022","author":"G Kim","year":"2022","unstructured":"Kim, G., et al.: OCR-free document understanding transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 498\u2013517. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_29"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Ko, M., Jin, M., Wang, C., Jia, R.: Practical membership inference attacks against large-scale multi-modal models: a pilot study. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4871\u20134881 (2023)","DOI":"10.1109\/ICCV51070.2023.00449"},{"key":"12_CR28","unstructured":"Koskela, A., J\u00e4lk\u00f6, J., Honkela, A.: Computing tight differential privacy guarantees using FFT. In: The 23rd International Conference on Artificial Intelligence and Statistics, (AISTATS 2020). Proceedings of Machine Learning Research, vol.\u00a0108, pp. 2560\u20132569. PMLR (2020). http:\/\/proceedings.mlr.press\/v108\/koskela20b.html"},{"key":"12_CR29","unstructured":"Koskela, A., J\u00e4lk\u00f6, J., Prediger, L., Honkela, A.: Tight differential privacy for discrete-valued mechanisms and for the subsampled Gaussian mechanism using FFT. In: The 24th International Conference on Artificial Intelligence and Statistics, (AISTATS 2021). Proceedings of Machine Learning Research, vol.\u00a0130, pp. 3358\u20133366. PMLR (2021). http:\/\/proceedings.mlr.press\/v130\/koskela21a.html"},{"key":"12_CR30","unstructured":"Kurakin, A., Chien, S., Song, S., Geambasu, R., Terzis, A., Thakurta, A.: Toward Training at ImageNet Scale with Differential Privacy. arXiv preprint abs\/2201.12328 (2022). https:\/\/arxiv.org\/abs\/2201.12328"},{"key":"12_CR31","unstructured":"Lee, K., et al.: Pix2struct: screenshot parsing as pretraining for visual language understanding. In: Proceedings of the 40th International Conference on Machine Learning. ICML 2023. JMLR.org (2023)"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Li, J., et\u00a0al.: DiT: self-supervised pre-training for document image transformer. In: ACMMM, pp. 3530\u20133539 (2022)","DOI":"10.1145\/3503161.3547911"},{"key":"12_CR33","unstructured":"Li, O., et al.: Label leakage and protection in two-party split learning. In: NeurIPS 2020 Workshop on Scalability, Privacy, and Security in Federated Learning (SpicyFL) (2020)"},{"key":"12_CR34","unstructured":"Li, X., Tram\u00e8r, F., Liang, P., Hashimoto, T.: Large language models can be strong differentially private learners. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, 25\u201329 April 2022. OpenReview.net (2022). https:\/\/openreview.net\/forum?id=bVuP3ltATMz"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Li, Z., Zhang, J., Liu, L., Liu, J.: Auditing privacy defenses in federated learning via generative gradient leakage. In: The IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.00989"},{"key":"12_CR36","unstructured":"Marathe, V.J., Kanani, P.: Subject granular differential privacy in federated learning (2022)"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Mathew, M., Bagal, V., Tito, R., Karatzas, D., Valveny, E., Jawahar, C.: Infographicvqa. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1697\u20131706 (2022)","DOI":"10.1109\/WACV51458.2022.00264"},{"key":"12_CR38","doi-asserted-by":"crossref","unstructured":"Mathew, M., Karatzas, D., Jawahar, C.: DocVQA: a dataset for VQA on document images. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2200\u20132209 (2021)","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"12_CR39","unstructured":"Mathew, M., Tito, R., Karatzas, D., Manmatha, R., Jawahar, C.: Document visual question answering challenge 2020. arXiv preprint arXiv:2008.08899 (2020)"},{"key":"12_CR40","unstructured":"McMahan, B., Moore, E., Ramage, D., Hampson, S., y\u00a0Arcas, B.A.: Communication-efficient learning of deep networks from decentralized data. In: Singh, A., Zhu, X.J. (eds.) Proceedings of the 20th International Conference on Artificial Intelligence and Statistics, AISTATS 2017, 20\u201322 April 2017, Fort Lauderdale, FL, USA. Proceedings of Machine Learning Research, vol.\u00a054, pp. 1273\u20131282. PMLR (2017). http:\/\/proceedings.mlr.press\/v54\/mcmahan17a.html"},{"key":"12_CR41","unstructured":"Mehta, H., Thakurta, A.G., Kurakin, A., Cutkosky, A.: Towards large scale transfer learning for differentially private image classification. Trans. Mach. Learn. Res. 2023 (2023). https:\/\/openreview.net\/forum?id=Uu8WwCFpQv"},{"key":"12_CR42","doi-asserted-by":"crossref","unstructured":"Melis, L., Song, C., De\u00a0Cristofaro, E., Shmatikov, V.: Exploiting unintended feature leakage in collaborative learning. In: 2019 IEEE Symposium on Security and Privacy (SP), pp. 691\u2013706. IEEE (2019)","DOI":"10.1109\/SP.2019.00029"},{"key":"12_CR43","doi-asserted-by":"crossref","unstructured":"Nasr, M., Shokri, R., Houmansadr, A.: Comprehensive privacy analysis of deep learning: Passive and active white-box inference attacks against centralized and federated learning. In: 2019 IEEE Symposium on Security and Privacy (SP), pp. 739\u2013753. IEEE (2019)","DOI":"10.1109\/SP.2019.00065"},{"key":"12_CR44","doi-asserted-by":"publisher","unstructured":"OpenAI: GPT-4 technical report. CoRR abs\/2303.08774 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2303.08774. https:\/\/doi.org\/10.48550\/arXiv.2303.08774","DOI":"10.48550\/ARXIV.2303.08774"},{"key":"12_CR45","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"732","DOI":"10.1007\/978-3-030-86331-9_47","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"R Powalski","year":"2021","unstructured":"Powalski, R., Borchmann, \u0141, Jurkiewicz, D., Dwojak, T., Pietruszka, M., Pa\u0142ka, G.: Going full-TILT boogie on document understanding with text-image-layout transformer. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12822, pp. 732\u2013747. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86331-9_47"},{"key":"12_CR46","doi-asserted-by":"crossref","unstructured":"Qi, L., et al.: DuReadervis: a Chinese dataset for open-domain document visual question answering. In: Findings of the Association for Computational Linguistics: ACL 2022, pp. 1338\u20131351 (2022)","DOI":"10.18653\/v1\/2022.findings-acl.105"},{"issue":"140","key":"12_CR47","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"12_CR48","unstructured":"Rajkumar, A., Agarwal, S.: A differentially private stochastic gradient descent algorithm for multiparty classification. In: Lawrence, N.D., Girolami, M.A. (eds.) Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, AISTATS 2012, La Palma, Canary Islands, Spain, 21\u201323 April 2012. JMLR Proceedings, vol.\u00a022, pp. 933\u2013941. JMLR.org (2012). http:\/\/proceedings.mlr.press\/v22\/rajkumar12.html"},{"key":"12_CR49","doi-asserted-by":"crossref","unstructured":"Shokri, R., Shmatikov, V.: Privacy-preserving deep learning. In: Proceedings of the 22nd ACM SIGSAC Conference on Computer and Communications Security, pp. 1310\u20131321 (2015)","DOI":"10.1145\/2810103.2813687"},{"key":"12_CR50","doi-asserted-by":"crossref","unstructured":"Shokri, R., Stronati, M., Song, C., Shmatikov, V.: Membership inference attacks against machine learning models. In: 2017 IEEE Symposium on Security and Privacy (SP), pp. 3\u201318. IEEE (2017)","DOI":"10.1109\/SP.2017.41"},{"key":"12_CR51","doi-asserted-by":"crossref","unstructured":"\u0160imsa, \u0160., et al.: Docile benchmark for document information localization and extraction. arXiv preprint arXiv:2302.05658 (2023)","DOI":"10.1007\/978-3-031-41679-8_9"},{"key":"12_CR52","doi-asserted-by":"publisher","unstructured":"Song, S., Chaudhuri, K., Sarwate, A.D.: Stochastic gradient descent with differentially private updates. In: IEEE Global Conference on Signal and Information Processing, GlobalSIP 2013, Austin, TX, USA, 3\u20135 December 2013, pp. 245\u2013248. IEEE (2013). https:\/\/doi.org\/10.1109\/GlobalSIP.2013.6736861","DOI":"10.1109\/GlobalSIP.2013.6736861"},{"key":"12_CR53","unstructured":"Suri, A., Kanani, P., Marathe, V.J., Peterson, D.W.: Subject membership inference attacks in federated learning. arXiv preprint arXiv:2206.03317 (2022)"},{"key":"12_CR54","doi-asserted-by":"crossref","unstructured":"Tanaka, R., Nishida, K., Yoshida, S.: Visualmrc: machine reading comprehension on document images. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 13878\u201313888 (2021)","DOI":"10.1609\/aaai.v35i15.17635"},{"key":"12_CR55","unstructured":"Tirumala, K., Markosyan, A., Zettlemoyer, L., Aghajanyan, A.: Memorization without overfitting: analyzing the training dynamics of large language models. In: Advances in Neural Information Processing Systems, vol. 35, pp. 38274\u201338290 (2022)"},{"key":"12_CR56","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"778","DOI":"10.1007\/978-3-030-86331-9_50","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"R Tito","year":"2021","unstructured":"Tito, R., Karatzas, D., Valveny, E.: Document collection visual question answering. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12822, pp. 778\u2013792. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86331-9_50"},{"key":"12_CR57","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109834","volume":"144","author":"R Tito","year":"2023","unstructured":"Tito, R., Karatzas, D., Valveny, E.: Hierarchical multimodal transformers for multipage DocVQA. Pattern Recogn. 144, 109834 (2023)","journal-title":"Pattern Recogn."},{"key":"12_CR58","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"635","DOI":"10.1007\/978-3-030-86337-1_42","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"R Tito","year":"2021","unstructured":"Tito, R., Mathew, M., Jawahar, C.V., Valveny, E., Karatzas, D.: ICDAR 2021 competition on document visual question answering. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12824, pp. 635\u2013649. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86337-1_42"},{"key":"12_CR59","unstructured":"Tobaben, M., Pradhan, G., He, Y., J\u00e4lk\u00f6, J., Honkela, A.: Understanding practical membership privacy of deep learning. CoRR abs\/2402.06674 (2024). https:\/\/doi.org\/10.48550\/arXiv.2402.06674"},{"key":"12_CR60","unstructured":"Tobaben, M., et al.: On the efficacy of differentially private few-shot image classification. Trans. Mach. Learn. Res. (2023). https:\/\/openreview.net\/forum?id=hFsr59Imzm"},{"key":"12_CR61","doi-asserted-by":"crossref","unstructured":"Van\u00a0Landeghem, J., et al.: Document understanding dataset and evaluation (dude). In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19528\u201319540 (2023)","DOI":"10.1109\/ICCV51070.2023.01789"},{"key":"12_CR62","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1007\/978-3-031-41679-8_24","volume-title":"ICDAR 2023","author":"J Van Landeghem","year":"2023","unstructured":"Van Landeghem, J., et al.: ICDAR 2023 competition on document understanding of everything (DUDE). In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023. LNCS, vol. 14188, pp. 420\u2013434. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41679-8_24"},{"issue":"2","key":"12_CR63","doi-asserted-by":"publisher","first-page":"227","DOI":"10.2478\/popets-2022-0043","volume":"2022","author":"A Wainakh","year":"2022","unstructured":"Wainakh, A., et al.: User-level label leakage from gradients in federated learning. Proc. Priv. Enhancing Technol. 2022(2), 227\u2013244 (2022)","journal-title":"Proc. Priv. Enhancing Technol."},{"key":"12_CR64","doi-asserted-by":"publisher","unstructured":"Wang, W., Li, Y., Ou, Y., Zhang, Y.: Layout and task aware instruction prompt for zero-shot document image question answering. CoRR abs\/2306.00526 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2306.00526","DOI":"10.48550\/ARXIV.2306.00526"},{"key":"12_CR65","unstructured":"Web: Industry Documents Library. https:\/\/www.industrydocuments.ucsf.edu\/. Accessed 20 Oct 2022"},{"key":"12_CR66","unstructured":"Web: Public Inspection Files. https:\/\/publicfiles.fcc.gov\/. Accessed 20 Oct 2022"},{"key":"12_CR67","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Layoutlmv2: multi-modal pre-training for visually-rich document understanding. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 2579\u20132591 (2021)","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"12_CR68","doi-asserted-by":"publisher","unstructured":"Ye, J., et al.: mPLUG-DocOwl: modularized multimodal large language model for document understanding. CoRR abs\/2307.02499 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2307.02499","DOI":"10.48550\/ARXIV.2307.02499"},{"key":"12_CR69","unstructured":"Yosinski, J., Clune, J., Bengio, Y., Lipson, H.: How transferable are features in deep neural networks? In: Ghahramani, Z., Welling, M., Cortes, C., Lawrence, N.D., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, 8\u201313 December 2014, Montreal, Quebec, Canada, pp. 3320\u20133328 (2014). https:\/\/proceedings.neurips.cc\/paper\/2014\/hash\/375c71349b295fbe2dcdca9206f20a06-Abstract.html"},{"key":"12_CR70","unstructured":"Yu, D., et al.: Differentially private fine-tuning of language models. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, 25\u201329 April 2022. OpenReview.net (2022). https:\/\/openreview.net\/forum?id=Q42f0dfjECO"},{"key":"12_CR71","unstructured":"Zhao, B., Mopuri, K.R., Bilen, H.: IDLG: improved deep leakage from gradients. arXiv preprint arXiv:2001.02610 (2020)"},{"key":"12_CR72","unstructured":"Zhu, L., Liu, Z., Han, S.: Deep leakage from gradients. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70552-6_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T14:07:49Z","timestamp":1749996469000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70552-6_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705519","9783031705526"],"references-count":72,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70552-6_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"11 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}