{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:49:03Z","timestamp":1767340143523,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"r\u00e9gion Grand Est France"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3689094.3689471","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T18:25:52Z","timestamp":1728411952000},"page":"50-56","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Historical Postcards Retrieval through Vision Foundation Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6818-026X","authenticated-orcid":false,"given":"Anis","family":"Amri","sequence":"first","affiliation":[{"name":"Universit\u00e9 de Lorraine, IDMC CNRS LORIA, Nancy, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0024-1280","authenticated-orcid":false,"given":"Salvatore","family":"Tabbone","sequence":"additional","affiliation":[{"name":"Universit\u00e9 de Lorraine, IDMC CNRS LORIA, Nancy, France"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"OpenAI Josh Achiam Steven Adler and Sandhini Agarwal et al. 2023. GPT-4 Technical Report. https:\/\/doi.org\/10.48550\/arXiv.2303.08774.","DOI":"10.48550\/arXiv.2303.08774"},{"volume-title":"International Conference on Pattern Recognition.","author":"Bartz Christian","key":"e_1_3_2_1_2_1","unstructured":"Christian Bartz, Hendrik Raetz, and Jona Otholt et al. 2022. Synthesis in style: Semantic segmentation of historical documents using synthetic data. In International Conference on Pattern Recognition."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","unstructured":"Rishi Bommasani Drew A. Hudson and Ehsan Adeli et al. 2021. On the opportunities and risks of foundation models. https:\/\/doi.org\/10.48550\/arXiv.2108.07258.","DOI":"10.48550\/arXiv.2108.07258"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","unstructured":"Tom B. Brown Benjamin Mann and Nick Ryder et al. 2020. Language Models are Few-Shot Learners. https:\/\/doi.org\/10.48550\/arXiv.2005.14165.","DOI":"10.48550\/arXiv.2005.14165"},{"key":"e_1_3_2_1_5_1","volume-title":"Sketching out the details: Sketch-based image retrieval using convolutional neural networks with multi-stage regression. Computers & Graphics 71","author":"Bui Tu","year":"2018","unstructured":"Tu Bui, Leonardo Ribeiro, Moacir Ponti, and John Collomosse. 2018. Sketching out the details: Sketch-based image retrieval using convolutional neural networks with multi-stage regression. Computers & Graphics 71 (2018)."},{"volume-title":"Emerging Properties in Self-Supervised Vision Transformers. In IEEE International Conference on Computer Vision.","author":"Caron Mathilde","key":"e_1_3_2_1_6_1","unstructured":"Mathilde Caron, Hugo Touvron, and Ishan Misra et al. 2021. Emerging Properties in Self-Supervised Vision Transformers. In IEEE International Conference on Computer Vision."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","unstructured":"Aakanksha Chowdhery Sharan Narang and Jacob Devlin et al. 2023. PaLM: Scaling Language Modeling with Pathways. https:\/\/doi.org\/10.48550\/arXiv.2204.02311.","DOI":"10.48550\/arXiv.2204.02311"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"e_1_3_2_1_10_1","article-title":"Deep learning for instance retrieval: A survey","volume":"45","author":"Yu Wei","year":"2022","unstructured":"Wei Chen; Yu Liu;WeipingWang et al. 2022. Deep learning for instance retrieval: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence 45, 6 (2022).","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICFHR.2014.85"},{"volume-title":"Deep layout extraction applied to historical postcards","author":"Garc\u00eda Bruno","key":"e_1_3_2_1_12_1","unstructured":"Bruno Garc\u00eda, Bel\u00e9n Moreno, Jos\u00e9 F. V\u00e9lez, and \u00c1ngel S\u00e1nchez. 2022. Deep layout extraction applied to historical postcards. Springer."},{"key":"e_1_3_2_1_13_1","volume-title":"European Conference on Computer Vision.","author":"Goodwin Walter","year":"2022","unstructured":"Walter Goodwin, Sagar Vaze, Ioannis Havoutis, and Ingmar Posner. 2022. Zeroshot category-level object pose estimation. In European Conference on Computer Vision."},{"volume-title":"German Conference on Pattern Recognition.","author":"Grzeszick Rene","key":"e_1_3_2_1_14_1","unstructured":"Rene Grzeszick and Gernot A. Fink. 2014. Recognizing scene categories of historical postcards. In German Conference on Pattern Recognition."},{"key":"e_1_3_2_1_15_1","volume-title":"Mask R-CNN. In IEEE International Conference on Computer Vision.","author":"He Kaiming","year":"2017","unstructured":"Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross Girshick. 2017. Mask R-CNN. In IEEE International Conference on Computer Vision."},{"key":"e_1_3_2_1_16_1","unstructured":"Glenn Jocher Alex Stoken and Ji\u00ed Borovec. 2021. ultralytics\/yolov5: v3.0."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_18_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning.","author":"Junnan Li","year":"2022","unstructured":"Li Junnan, Li Dongxu, Xiong Caiming, and Hoi Steven. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Alexander Kirillov Eric Mintun and Nikhila Ravi et al. 2023. Segment anything. https:\/\/doi.org\/10.48550\/arXiv.2304.02643.","DOI":"10.48550\/arXiv.2304.02643"},{"key":"e_1_3_2_1_21_1","volume-title":"Hinton","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E. Hinton. 2012. Imagenet classification with deep convolutional neural networks. Commun. ACM 60, 6 (2012)."},{"volume-title":"Annual Meeting of the Association for Computational Linguistics.","author":"Lewis Mike","key":"e_1_3_2_1_22_1","unstructured":"Mike Lewis, Yinhan Liu, and Naman Goyal et al. 2020. BART: Denoising Sequenceto- Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. In Annual Meeting of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-004-0138-z"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","unstructured":"Yinhan Liu Myle Ott and Naman Goyal et al. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. https:\/\/doi.org\/10.48550\/arXiv.1907.11692.","DOI":"10.48550\/arXiv.1907.11692"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_26_1","volume-title":"Historical Document Image Segmentation Combining Deep Learning and Gabor Features. In International Conference on Document Analysis and Recogntion.","author":"Mehri Maroua","year":"2023","unstructured":"Maroua Mehri, Akrem Sellami, and Salvatore Tabbone. 2023. Historical Document Image Segmentation Combining Deep Learning and Gabor Features. In International Conference on Document Analysis and Recogntion."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00818"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Jamshed Memon Maira Sami and Rizwan Ahmed Khan et al. 2020. Handwritten optical character recognition (ocr): A comprehensive systematic literature review (SLR). IEEE ACCESS 8 (2020).","DOI":"10.1109\/ACCESS.2020.3012542"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR56361.2022.9956617"},{"volume-title":"CNOS: A Strong Baseline for CAD-based Novel Object Segmentation. In International Conference on Computer Vision Workshops.","author":"Nguyen Van Nguyen","key":"e_1_3_2_1_30_1","unstructured":"Van Nguyen Nguyen, Thibault Groueix, and Georgy Ponimatkin et al. 2023. CNOS: A Strong Baseline for CAD-based Novel Object Segmentation. In International Conference on Computer Vision Workshops."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","unstructured":"Maxime Oquab Timoth\u00e9e Darcet and Th\u00e9o Moutakanni et al. 2023. DINOv2: Learning Robust Visual Features without Supervision. https:\/\/doi.org\/10.48550\/arXiv.2304.07193.","DOI":"10.48550\/arXiv.2304.07193"},{"volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning.","author":"Radford Alec","key":"e_1_3_2_1_32_1","unstructured":"Alec Radford, Jong W. Kim, and Chris Hallacy et al. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.acalib.2023.102736"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJMLC.2012.V2.137"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/946247.946751"},{"volume-title":"Workshop on Computational Humanities Research.","author":"Smits Thomas","key":"e_1_3_2_1_36_1","unstructured":"Thomas Smits, Wouter Haverals, and Loren Verreyen et al. 2023. Greetings from! Extracting address information from 100, 000 historical picture postcards. In Workshop on Computational Humanities Research."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation.","author":"Sugisaki Kyoko","year":"2018","unstructured":"Kyoko Sugisaki, Nicolas Wiedmer, and Heiko Hausendorf. 2018. Building a corpus from handwritten picture postcards: Transcription, annotation and partof- speech tagging. In Proceedings of the Eleventh International Conference on Language Resources and Evaluation."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","unstructured":"Hugo Touvron Thibaut Lavril and Gautier Izacard et al. 2023. LLaMA: Open and Efficient Foundation Language Models. https:\/\/doi.org\/10.48550\/arXiv.2302.13971.","DOI":"10.48550\/arXiv.2302.13971"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2307.09288"},{"volume-title":"IEEE Conference on Computer Vision and Pattern Recognition.","author":"Wang Yangtao","key":"e_1_3_2_1_40_1","unstructured":"Yangtao Wang, Xi Shen, and Shell Hu et al. 2022. Self-supervised transformers for unsupervised object discovery using normalized cut. In IEEE Conference on Computer Vision and Pattern Recognition."},{"volume-title":"International Conference on Pattern Recognition.","author":"Wolf Fabian","key":"e_1_3_2_1_41_1","unstructured":"Fabian Wolf and Gernot A. Fink. 2022. Self-training of handwritten word recognition for synthetic-to-real adaptation. In International Conference on Pattern Recognition."},{"volume-title":"IEEE International conference on computer vision.","author":"Artem","key":"e_1_3_2_1_42_1","unstructured":"Artem B. Yandex and Victor Lempitsky. 2015. Aggregating local deep features for image retrieval. In IEEE International conference on computer vision."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2013.05.037"},{"volume-title":"European conference on computer vision.","author":"Zhang Jingyi","key":"e_1_3_2_1_44_1","unstructured":"Jingyi Zhang, Fumin Shen, Li Liu, and Mengyang Yu et al. Fan Zhu. 2018. Generative domain-migration hashing for sketch-to-image retrieval. In European conference on computer vision."},{"key":"e_1_3_2_1_45_1","volume-title":"SIFT meets CNN: A decade survey of instance retrieval","author":"Zheng Liang","year":"2015","unstructured":"Liang Zheng, Yi Yang, and Qi Tian. 2015. SIFT meets CNN: A decade survey of instance retrieval. IEEE transactions on pattern analysis and machine intelligence 14, 8 (2015)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","unstructured":"Xueyan Zou Jianwei Yang and Hao Zhang et al. 2023. Segment everything everywhere all at once. https:\/\/doi.org\/10.48550\/arXiv.2304.06718.","DOI":"10.48550\/arXiv.2304.06718"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 6th workshop on the analySis, Understanding and proMotion of heritAge Contents"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689094.3689471","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3689094.3689471","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:37:46Z","timestamp":1755974266000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689094.3689471"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":46,"alternative-id":["10.1145\/3689094.3689471","10.1145\/3689094"],"URL":"https:\/\/doi.org\/10.1145\/3689094.3689471","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}