{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:40:12Z","timestamp":1755884412342,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,14]]},"DOI":"10.1145\/3677052.3698690","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:38:06Z","timestamp":1731566286000},"page":"54-62","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TADACap: Time-series Adaptive Domain-Aware Captioning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9315-2067","authenticated-orcid":false,"given":"Elizabeth","family":"Fons","sequence":"first","affiliation":[{"name":"J.P. Morgan AI Research, UK"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9223-4546","authenticated-orcid":false,"given":"Rachneet","family":"Kaur","sequence":"additional","affiliation":[{"name":"J.P. Morgan AI Research, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4383-3590","authenticated-orcid":false,"given":"Zhen","family":"Zeng","sequence":"additional","affiliation":[{"name":"J.P. Morgan AI Research, US"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2112-7100","authenticated-orcid":false,"given":"Soham","family":"Palande","sequence":"additional","affiliation":[{"name":"J.P Morgan AI Research, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5148-2033","authenticated-orcid":false,"given":"Tucker","family":"Balch","sequence":"additional","affiliation":[{"name":"JP Morgan, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7650-9880","authenticated-orcid":false,"given":"Svitlana","family":"Vyetrenko","sequence":"additional","affiliation":[{"name":"J. P. Morgan Chase, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5355-1912","authenticated-orcid":false,"given":"Manuela","family":"Veloso","sequence":"additional","affiliation":[{"name":"JP Morgan, US"}]}],"member":"320","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2020. Time Series Data of Covid-19. https:\/\/www.kaggle.com\/datasets\/baguspurnama\/covid-confirmed-global. Accessed: 2023-05-15."},{"key":"e_1_3_2_1_2_1","volume-title":"Gpt-4 technical report. arXiv preprint arXiv:2303.08774","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia\u00a0Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00904"},{"key":"e_1_3_2_1_4_1","volume-title":"Spice: Semantic propositional image caption evaluation. In Computer Vision\u2013ECCV 2016: 14th European Conference","author":"Anderson Peter","year":"2016","unstructured":"Peter Anderson, Basura Fernando, Mark Johnson, and Stephen Gould. 2016. Spice: Semantic propositional image caption evaluation. In Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part V 14. Springer, 382\u2013398."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-1607"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00512"},{"key":"e_1_3_2_1_9_1","unstructured":"David Byrd. 2019. Explaining Agent-Based Financial Market Simulation. arxiv:1909.11650\u00a0[cs.MA]"},{"key":"e_1_3_2_1_10_1","volume-title":"International Conference on Machine Learning. PMLR, 716\u2013725","author":"Celis Elisa","year":"2018","unstructured":"Elisa Celis, Vijay Keswani, Damian Straszak, Amit Deshpande, Tarun Kathuria, and Nisheeth Vishnoi. 2018. Fair and diverse DPP-based data summarization. In International Conference on Machine Learning. PMLR, 716\u2013725."},{"key":"e_1_3_2_1_11_1","volume-title":"Figure captioning with reasoning and sequence-level training. arXiv preprint arXiv:1906.02850","author":"Chen Charles","year":"2019","unstructured":"Charles Chen, Ruiyi Zhang, Eunyee Koh, Sungchul Kim, Scott Cohen, Tong Yu, Ryan Rossi, and Razvan Bunescu. 2019. Figure captioning with reasoning and sequence-level training. arXiv preprint arXiv:1906.02850 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Fast greedy map inference for determinantal point process to improve recommendation diversity. Advances in Neural Information Processing Systems 31","author":"Chen Laming","year":"2018","unstructured":"Laming Chen, Guoxin Zhang, and Eric Zhou. 2018. Fast greedy map inference for determinantal point process to improve recommendation diversity. Advances in Neural Information Processing Systems 31 (2018)."},{"key":"e_1_3_2_1_13_1","volume-title":"Piotr Dollar, and C\u00a0Lawrence Zitnick","author":"Chen Xinlei","year":"2015","unstructured":"Xinlei Chen, Hao Fang, Tsung-Yi Lin, Ramakrishna Vedantam, Saurabh Gupta, Piotr Dollar, and C\u00a0Lawrence Zitnick. 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383455.3422544"},{"key":"e_1_3_2_1_15_1","volume-title":"A survey on rag meets llms: Towards retrieval-augmented large language models. arXiv preprint arXiv:2405.06211","author":"Ding Yujuan","year":"2024","unstructured":"Yujuan Ding, Wenqi Fan, Liangbo Ning, Shijie Wang, Hengyun Li, Dawei Yin, Tat-Seng Chua, and Qing Li. 2024. A survey on rag meets llms: Towards retrieval-augmented large language models. arXiv preprint arXiv:2405.06211 (2024)."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings, Part XVII 16","author":"Gurari Danna","year":"2020","unstructured":"Danna Gurari, Yinan Zhao, Meng Zhang, and Nilavra Bhattacharya. 2020. Captioning images taken by people who are blind. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVII 16. Springer, 417\u2013434."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2018.00014"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 17980\u201317989","author":"Hu Xiaowei","year":"2022","unstructured":"Xiaowei Hu, Zhe Gan, Jianfeng Wang, Zhengyuan Yang, Zicheng Liu, Yumao Lu, and Lijuan Wang. 2022. Scaling up vision-language pre-training for image captioning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 17980\u201317989."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Harsh Jhamtani and Taylor Berg-Kirkpatrick. 2021. Truth-Conditional Captioning of Time Series Data. In EMNLP.","DOI":"10.18653\/v1\/2021.emnlp-main.55"},{"key":"e_1_3_2_1_20_1","volume-title":"Figureqa: An annotated figure dataset for visual reasoning. arXiv preprint arXiv:1710.07300","author":"Kahou Samira\u00a0Ebrahimi","year":"2017","unstructured":"Samira\u00a0Ebrahimi Kahou, Vincent Michalski, Adam Atkinson, Akos Kadar, Adam Trischler, and Yoshua Bengio. 2017. Figureqa: An annotated figure dataset for visual reasoning. arXiv preprint arXiv:1710.07300 (2017)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/2481023"},{"key":"e_1_3_2_1_22_1","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33 (2020), 9459\u20139474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.488"},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 12888\u201312900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International Conference on Machine Learning. PMLR, 12888\u201312900."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings, Part XXX 16","author":"Li Xiujun","year":"2020","unstructured":"Xiujun Li, Xi Yin, Chunyuan Li, Pengchuan Zhang, Xiaowei Hu, Lei Zhang, Lijuan Wang, Houdong Hu, Li Dong, Furu Wei, 2020. Oscar: Object-semantics aligned pre-training for vision-language tasks. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXX 16. Springer, 121\u2013137."},{"key":"e_1_3_2_1_26_1","unstructured":"Zekun Li Shiyang Li and Xifeng Yan. 2023. Time Series as Images: Vision Transformer for Irregularly Sampled Time Series."},{"key":"e_1_3_2_1_27_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74\u201381. https:\/\/aclanthology.org\/W04-1013"},{"key":"e_1_3_2_1_28_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems 36","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems 36 (2024)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450089"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.100"},{"key":"e_1_3_2_1_31_1","volume-title":"I-tuning: Tuning language models with image for caption generation. arXiv preprint arXiv:2202.06574","author":"Luo Ziyang","year":"2022","unstructured":"Ziyang Luo, Yadong Xi, Rongsheng Zhang, and Jing Ma. 2022. I-tuning: Tuning language models with image for caption generation. arXiv preprint arXiv:2202.06574 (2022)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Anita Mahinpei Zona Kostic and Chris Tanner. 2022. LineCap: Line Charts for Data Visualization Captioning Models. arxiv:2207.07243\u00a0[cs.CV]","DOI":"10.1109\/VIS54862.2022.00016"},{"key":"e_1_3_2_1_33_1","volume-title":"Clipcap: Clip prefix for image captioning. arXiv preprint arXiv:2111.09734","author":"Mokady Ron","year":"2021","unstructured":"Ron Mokady, Amir Hertz, and Amit\u00a0H Bermano. 2021. Clipcap: Clip prefix for image captioning. arXiv preprint arXiv:2111.09734 (2021)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1126"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449923"},{"key":"e_1_3_2_1_36_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_37_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_38_1","volume-title":"Retrieval-augmented Image Captioning. arXiv preprint arXiv:2302.08268","author":"Ramos Rita","year":"2023","unstructured":"Rita Ramos, Desmond Elliott, and Bruno Martins. 2023. Retrieval-augmented Image Captioning. arXiv preprint arXiv:2302.08268 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"SmallCap: Lightweight Image Captioning Prompted with Retrieval Augmentation. arXiv preprint arXiv:2209.15323","author":"Ramos Rita","year":"2022","unstructured":"Rita Ramos, Bruno Martins, Desmond Elliott, and Yova Kementchedjhieva. 2022. SmallCap: Lightweight Image Captioning Prompted with Retrieval Augmentation. arXiv preprint arXiv:2209.15323 (2022)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9533978"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3549555.3549585"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2022.10.006"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490354.3494387"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-54906-9_42"},{"key":"e_1_3_2_1_45_1","volume-title":"Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805","author":"Team Gemini","year":"2023","unstructured":"Gemini Team, Rohan Anil, Sebastian Borgeaud, Yonghui Wu, Jean-Baptiste Alayrac, Jiahui Yu, Radu Soricut, Johan Schalkwyk, Andrew\u00a0M Dai, Anja Hauth, 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_46_1","volume-title":"Zero-shot image-to-text generation for visual-semantic arithmetic. arXiv preprint arXiv:2111.14447","author":"Tewel Yoad","year":"2021","unstructured":"Yoad Tewel, Yoav Shalev, Idan Schwartz, and Lior Wolf. 2021. Zero-shot image-to-text generation for visual-semantic arithmetic. arXiv preprint arXiv:2111.14447 (2021)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_1_48_1","volume-title":"Git: A generative image-to-text transformer for vision and language. arXiv preprint arXiv:2205.14100","author":"Wang Jianfeng","year":"2022","unstructured":"Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, and Lijuan Wang. 2022. Git: A generative image-to-text transformer for vision and language. arXiv preprint arXiv:2205.14100 (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"Personalized re-ranking for improving diversity in live recommender systems. arXiv preprint arXiv:2004.06390","author":"Wang Yichao","year":"2020","unstructured":"Yichao Wang, Xiangyu Zhang, Zhirong Liu, Zhenhua Dong, Xinhua Feng, Ruiming Tang, and Xiuqiang He. 2020. Personalized re-ranking for improving diversity in live recommender systems. arXiv preprint arXiv:2004.06390 (2020)."},{"key":"e_1_3_2_1_50_1","volume-title":"Simvlm: Simple visual language model pretraining with weak supervision. arXiv preprint arXiv:2108.10904","author":"Wang Zirui","year":"2021","unstructured":"Zirui Wang, Jiahui Yu, Adams\u00a0Wei Yu, Zihang Dai, Yulia Tsvetkov, and Yuan Cao. 2021. Simvlm: Simple visual language model pretraining with weak supervision. arXiv preprint arXiv:2108.10904 (2021)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3272018"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358105"},{"key":"e_1_3_2_1_53_1","volume-title":"International conference on machine learning. PMLR","author":"Xu Kelvin","year":"2015","unstructured":"Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhudinov, Rich Zemel, and Yoshua Bengio. 2015. Show, attend and tell: Neural image caption generation with visual attention. In International conference on machine learning. PMLR, 2048\u20132057."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548409"},{"key":"e_1_3_2_1_55_1","volume-title":"Retrieval-augmented multimodal language modeling. arXiv preprint arXiv:2211.12561","author":"Yasunaga Michihiro","year":"2022","unstructured":"Michihiro Yasunaga, Armen Aghajanyan, Weijia Shi, Rich James, Jure Leskovec, Percy Liang, Mike Lewis, Luke Zettlemoyer, and Wen-tau Yih. 2022. Retrieval-augmented multimodal language modeling. arXiv preprint arXiv:2211.12561 (2022)."},{"key":"e_1_3_2_1_56_1","unstructured":"Qinghao Ye Haiyang Xu Guohai Xu Jiabo Ye Ming Yan Yiyang Zhou Junyang Wang Anwen Hu Pengcheng Shi Yaya Shi Chaoya Jiang Chenliang Li Yuanhong Xu Hehong Chen Junfeng Tian Qian Qi Ji Zhang and Fei Huang. 2023. mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality. arxiv:2304.14178\u00a0[cs.CL]"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490354.3494404"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626905"},{"key":"e_1_3_2_1_59_1","volume-title":"Retrieval-augmented generation for ai-generated content: A survey. arXiv preprint arXiv:2402.19473","author":"Zhao Penghao","year":"2024","unstructured":"Penghao Zhao, Hailin Zhang, Qinhan Yu, Zhengren Wang, Yunteng Geng, Fangcheng Fu, Ling Yang, Wentao Zhang, and Bin Cui. 2024. Retrieval-augmented generation for ai-generated content: A survey. arXiv preprint arXiv:2402.19473 (2024)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10186235"}],"event":{"name":"ICAIF '24: 5th ACM International Conference on AI in Finance","acronym":"ICAIF '24","location":"Brooklyn NY USA"},"container-title":["Proceedings of the 5th ACM International Conference on AI in Finance"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698690","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677052.3698690","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:11:58Z","timestamp":1755882718000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698690"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":60,"alternative-id":["10.1145\/3677052.3698690","10.1145\/3677052"],"URL":"https:\/\/doi.org\/10.1145\/3677052.3698690","relation":{},"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"2024-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}