{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:14:06Z","timestamp":1750220046741,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,17]],"date-time":"2023-02-17T00:00:00Z","timestamp":1676592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,17]]},"DOI":"10.1145\/3587716.3587778","type":"proceedings-article","created":{"date-parts":[[2023,9,7]],"date-time":"2023-09-07T23:27:30Z","timestamp":1694129250000},"page":"379-385","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Research on Fine-grained Classification of Scene Images Fused with Multimodality"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8971-5085","authenticated-orcid":false,"given":"Yao","family":"Wen","sequence":"first","affiliation":[{"name":"School of Information Science and Technology, Tibet University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6226-2231","authenticated-orcid":false,"given":"Peng","family":"Nan","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Tibet University, China and \rCollaborative Innovation Center for Tibet informatization by MOE and Tibet Autonomous Region, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1984-6770","authenticated-orcid":false,"given":"Nuo","family":"Qun","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Tibet University, China and \rCollaborative Innovation Center for Tibet informatization by MOE and Tibet Autonomous Region, China"}]}],"member":"320","published-online":{"date-parts":[[2023,9,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Places: A 10 million image database for scene recognition[J]","author":"Zhou B","year":"2017","unstructured":"Zhou B, Lapedriza A, Khosla A, Places: A 10 million image database for scene recognition[J]. IEEE transactions on pattern analysis and machine intelligence, 2017, 40(6): 1452-1464."},{"key":"e_1_3_2_1_2_1","volume-title":"Deep learning for fine-grained image analysis:A survey[J]. arXiv preprint arXiv:1907.03069","author":"Wei X S","year":"2019","unstructured":"Wei X S, Wu J, Cui Q. Deep learning for fine-grained image analysis:A survey[J]. arXiv preprint arXiv:1907.03069, 2019."},{"volume-title":"2020 Second International Conference on Inventive Research in Computing Applications (ICIRCA). IEEE,2020","author":"Mittal","key":"e_1_3_2_1_3_1","unstructured":"Mittal, Rishabh, and Anchal Garg. \u201dText extraction using OCR: a systematic review.\u201d 2020 Second International Conference on Inventive Research in Computing Applications (ICIRCA). IEEE,2020."},{"key":"e_1_3_2_1_4_1","volume-title":"Multimodal machine learning: A survey and taxonomy[J]","author":"Baltrusaitis T","year":"2018","unstructured":"Baltrusaitis T, Ahuja C, Morency L P. Multimodal machine learning: A survey and taxonomy[J]. IEEE transactions on pattern analysis and Machine intelligence, 2018, 41(2): 423-443."},{"key":"e_1_3_2_1_5_1","first-page":"12113","volume":"2020","author":"Yu D","unstructured":"Yu D, Li X, Zhang C, Towards accurate scene text recognition with semantic reasoning networks[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020: 12113-12122.","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_6_1","first-page":"1735","volume":"1997","author":"Hochreiter S","unstructured":"Hochreiter S,Schmidhuber J.Long Short-Term Memory[J].Neural Computation,1997,9(8):1735-1780.","journal-title":"Neural Computation"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Kim Y. Convolutional Neural Networks for Sentence Classification[J].arXiv preprint arXiv:1408.5882 2014.","DOI":"10.3115\/v1\/D14-1181"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Kalchbrenner N Grefenstette E Blunsom P. A convolutional neural network for modelling sentences[J]. arXiv preprint arXiv:1404.2188 2014.","DOI":"10.3115\/v1\/P14-1062"},{"key":"e_1_3_2_1_9_1","unstructured":"Bahdanau D Cho K Bengio Y. Neural machine translation by jointly learning to align and translate[J].arXiv preprint arXiv:1409.0473 2014."},{"key":"e_1_3_2_1_10_1","unstructured":"Vaswani A Shazeer N Parmar N Attention is all you need[C]\/\/Advances in Neural Information Processing Systems(NIPS) 2017: 5998-6008"},{"key":"e_1_3_2_1_11_1","first-page":"757","volume":"2013","author":"Karaoglu S","unstructured":"Karaoglu S, van Gemert J C, Gevers T. Con-text: text detection using background connectivity for fine-grained object classification[C]\/\/Proceedings of the 21st ACM international conference on Multimedia. 2013: 757-760.","journal-title":"Multimedia."},{"key":"e_1_3_2_1_12_1","first-page":"4023","volume":"2021","author":"Mafla A","unstructured":"Mafla A, Dey S, Biten A F, Multi-modal reasoning graph for scenetext based fine-grained image classification and retrieval[C]\/\/Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 2021: 4023-4033.","journal-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision."},{"key":"e_1_3_2_1_13_1","first-page":"4092","volume":"2019","author":"Yang Y","unstructured":"Yang Y, Wang K T, Zhan D C, Comprehensive Semi-Supervised Multi-Modal Learning[C]\/\/IJCAI. 2019: 4092-4098.","journal-title":"IJCAI."},{"key":"e_1_3_2_1_14_1","unstructured":"Gan C Schwartz J Alter S Threedworld: A platform for interactive multi-modal physical simulation[J]. arXiv preprint arXiv:2007.04954 2020."},{"key":"e_1_3_2_1_15_1","volume-title":"Efficient large-scale multi-modal classification[C]\/\/Proceedings of the AAAI Conference on Artificial Intelligence","author":"Kiela D","year":"2018","unstructured":"Kiela D, Grave E, Joulin A, Efficient large-scale multi-modal classification[C]\/\/Proceedings of the AAAI Conference on Artificial Intelligence. 2018, 32(1)."},{"key":"e_1_3_2_1_16_1","first-page":"12695","volume":"2020","author":"Wang W","unstructured":"Wang W, Tran D, Feiszli M. What makes training multi-modal classification networks hard?[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020: 12695-12705.","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJCTE.2018.V10.1198"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJCTE.2020.V12.1275"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJCTE.2013.V5.722"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJCTE.2011.V3.287"}],"event":{"name":"ICMLC 2023: 2023 15th International Conference on Machine Learning and Computing","acronym":"ICMLC 2023","location":"Zhuhai China"},"container-title":["Proceedings of the 2023 15th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3587716.3587778","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3587716.3587778","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:08:00Z","timestamp":1750183680000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3587716.3587778"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,17]]},"references-count":20,"alternative-id":["10.1145\/3587716.3587778","10.1145\/3587716"],"URL":"https:\/\/doi.org\/10.1145\/3587716.3587778","relation":{},"subject":[],"published":{"date-parts":[[2023,2,17]]},"assertion":[{"value":"2023-09-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}