{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:05:45Z","timestamp":1765343145370,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","funder":[{"name":"China National Science Foundation","award":["62372393"],"award-info":[{"award-number":["62372393"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755600","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:27:39Z","timestamp":1761377259000},"page":"8636-8644","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["GalaxAlign: Mimicking Citizen Scientists' Multimodal Guidance for Galaxy Morphology Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-3513-1945","authenticated-orcid":false,"given":"Ruoqi","family":"Wang","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8394-6410","authenticated-orcid":false,"given":"Haitao","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-Sen University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2861-9492","authenticated-orcid":false,"given":"Qiong","family":"Luo","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China and The Hong Kong University of Science and Technology, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"e_1_3_2_2_2_1","first-page":"317","volume-title":"American Astronomical Society Meeting Abstracts# 228","volume":"228","author":"Blum Robert D","year":"2016","unstructured":"Robert D Blum, Kaylan Burleigh, Arjun Dey, David J Schlegel, Aaron M Meisner, Michael Levi, Adam D Myers, Dustin Lang, John Moustakas, Anna Patej, et al., 2016. The decam legacy survey. In American Astronomical Society Meeting Abstracts# 228, Vol. 228. 317-01."},{"key":"e_1_3_2_2_3_1","volume-title":"NeurIPS 2022 Machine Learning and the Physical Sciences Workshop.","author":"Bowles Micah","year":"2022","unstructured":"Micah Bowles, Hongming Tang, Eleni Vardoulaki, Emma L Alexander, Yan Luo, Lawrence Rudnick, Mike Walmsley, Fiona Porter, Anna MM Scaife, Inigo Val Slijepcevic, et al., 2022. A New Task: Deriving Semantic Class Targets for the Physical Sciences. In NeurIPS 2022 Machine Learning and the Physical Sciences Workshop."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stad1021"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.3847\/1538-3881\/ab089d"},{"key":"e_1_3_2_2_7_1","volume-title":"Rotation-invariant convolutional neural networks for galaxy morphology prediction. Monthly notices of the royal astronomical society","author":"Dieleman Sander","year":"2015","unstructured":"Sander Dieleman, Kyle W Willett, and Joni Dambre. 2015. Rotation-invariant convolutional neural networks for galaxy morphology prediction. Monthly notices of the royal astronomical society, Vol. 450, 2 (2015), 1441-1459."},{"key":"e_1_3_2_2_8_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_2_9_1","first-page":"27092","article-title":"Datacomp: In search of the next generation of multimodal datasets","volume":"36","author":"Gadre Samir Yitzhak","year":"2023","unstructured":"Samir Yitzhak Gadre, Gabriel Ilharco, Alex Fang, Jonathan Hayase, Georgios Smyrnis, Thao Nguyen, Ryan Marten, Mitchell Wortsman, Dhruba Ghosh, Jieyu Zhang, et al., 2023. Datacomp: In search of the next generation of multimodal datasets. Advances in Neural Information Processing Systems, Vol. 36 (2023), 27092-27112.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"volume-title":"Galaxy10 DECals Dataset. https:\/\/github.com\/henrysky\/Galaxy10","author":"Henry Leung","key":"e_1_3_2_2_12_1","unstructured":"Leung Henry. 2021. Galaxy10 DECals Dataset. https:\/\/github.com\/henrysky\/Galaxy10."},{"key":"e_1_3_2_2_13_1","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., 2022. Lora: Low-rank adaptation of large language models. ICLR, Vol. 1, 2 (2022), 3.","journal-title":"ICLR"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","unstructured":"Gabriel Ilharco Mitchell Wortsman Ross Wightman Cade Gordon Nicholas Carlini Rohan Taori Achal Dave Vaishaal Shankar Hongseok Namkoong John Miller Hannaneh Hajishirzi Ali Farhadi and Ludwig Schmidt. 2021. OpenCLIP. doi:10.5281\/zenodo.5143773 If you use this software please cite it as below..","DOI":"10.5281\/zenodo.5143773"},{"key":"e_1_3_2_2_15_1","volume-title":"Umaima Rahman, Mohsen Guizani, and Fakhri Karray.","author":"Imam Raza","year":"2024","unstructured":"Raza Imam, Mohammed Talha Alam, Umaima Rahman, Mohsen Guizani, and Fakhri Karray. 2024. CosmoCLIP: Generalizing Large Vision-Language Models for Astronomical Imaging. arXiv preprint arXiv:2407.07315 (2024)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stad2852"},{"key":"e_1_3_2_2_17_1","volume-title":"NeurIPS 2023 AI for Science Workshop.","author":"Lanusse Francois","year":"2023","unstructured":"Francois Lanusse, Liam Holden Parker, Siavash Golkar, Alberto Bietti, Miles Cranmer, Michael Eickenberg, Geraud Krawezik, Michael McCabe, Ruben Ohana, Mariel Pettee, et al., 2023. AstroCLIP: cross-modal pre-training for astronomical foundation models. In NeurIPS 2023 AI for Science Workshop."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1365-2966.2010.17432.x"},{"key":"e_1_3_2_2_19_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023b. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2023), 34892-34916."},{"key":"e_1_3_2_2_20_1","unstructured":"Shengchao Liu Yanjing Li Zhuoxinran Li Anthony Gitter Yutao Zhu Jiarui Lu Zhao Xu Weili Nie Arvind Ramanathan Chaowei Xiao et al. 2023a. A text-guided protein design framework. arXiv preprint arXiv:2302.04611 (2023)."},{"key":"e_1_3_2_2_21_1","volume-title":"PAPERCLIP: Associating Astronomical Observations and Natural Language with Multi-Modal Models. arXiv preprint arXiv:2403.08851","author":"Mishra-Sharma Siddharth","year":"2024","unstructured":"Siddharth Mishra-Sharma, Yiding Song, and Jesse Thaler. 2024. PAPERCLIP: Associating Astronomical Observations and Natural Language with Multi-Modal Models. arXiv preprint arXiv:2403.08851 (2024)."},{"key":"e_1_3_2_2_22_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_2_23_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael Bernstein et al. 2015. Imagenet large scale visual recognition challenge. International journal of computer vision Vol. 115 (2015) 211-252.","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-42328-w"},{"key":"e_1_3_2_2_26_1","unstructured":"Christoph Schuhmann Romain Beaumont Richard Vencu Cade Gordon Ross Wightman Mehdi Cherti Theo Coombes Aarush Katta Clayton Mullis Mitchell Wortsman et al. 2022. Laion-5b: An open large-scale dataset for training next generation image-text models. Advances in neural information processing systems Vol. 35 (2022) 25278-25294."},{"key":"e_1_3_2_2_27_1","first-page":"317","volume-title":"American Astronomical Society Meeting Abstracts# 228","volume":"228","author":"Silva David R","year":"2016","unstructured":"David R Silva, Robert D Blum, Lori Allen, Arjun Dey, David J Schlegel, Dustin Lang, John Moustakas, Aaron M Meisner, Francisco Valdes, Anna Patej, et al., 2016. The Mayall z-band Legacy Survey. In American Astronomical Society Meeting Abstracts# 228, Vol. 228. 317-02."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1093\/rasti\/rzad055"},{"key":"e_1_3_2_2_29_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_2_30_1","volume-title":"Gaurav Kumar Nayak, and Mubarak Shah","author":"Cepeda Vicente Vivanco","year":"2024","unstructured":"Vicente Vivanco Cepeda, Gaurav Kumar Nayak, and Mubarak Shah. 2024. Geoclip: Clip-inspired alignment between locations and images for effective worldwide geo-localization. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_31_1","volume-title":"Vision foundation models: can they be applied to astrophysics data? arXiv preprint arXiv:2409.11175","author":"Voloshynovskyy S","year":"2024","unstructured":"S Voloshynovskyy. 2024. Vision foundation models: can they be applied to astrophysics data? arXiv preprint arXiv:2409.11175 (2024)."},{"key":"e_1_3_2_2_32_1","unstructured":"Mike Walmsley. 2022. Galaxy MNIST Dataset. https:\/\/github.com\/mwalmsley\/galaxy_mnist."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.05312"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stad2919"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stab2093"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stac525"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stz2816"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stt1458"},{"key":"e_1_3_2_2_39_1","volume-title":"Galaxy Zoo Classification. https:\/\/www.zooniverse.org\/projects\/zookeeper\/galaxy-zoo\/classify","author":"Zoo The Galaxy","year":"2024","unstructured":"The Galaxy Zoo. 2024. Galaxy Zoo Classification. https:\/\/www.zooniverse.org\/projects\/zookeeper\/galaxy-zoo\/classify. Nov. 2024."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1088\/1538-3873\/aa65ba"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755600","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:02:17Z","timestamp":1765342937000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755600"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":40,"alternative-id":["10.1145\/3746027.3755600","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755600","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}