{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T18:27:06Z","timestamp":1763922426154,"version":"3.45.0"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032093677","type":"print"},{"value":"9783032093684","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T00:00:00Z","timestamp":1763942400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T00:00:00Z","timestamp":1763942400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-09368-4_10","type":"book-chapter","created":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T18:14:15Z","timestamp":1763921655000},"page":"158-174","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Non-linear Audio-Visual Storytelling from\u00a0Scanned Comics: A Character-Centric Approach"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6196-4984","authenticated-orcid":false,"given":"Ansh","family":"Kushwaha","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7272-6124","authenticated-orcid":false,"given":"Sandeep","family":"Khanna","sequence":"additional","affiliation":[]},{"given":"Lenin","family":"Khangjrakpam","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3431-0483","authenticated-orcid":false,"given":"Chiranjoy","family":"Chattopadhyay","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0282-3372","authenticated-orcid":false,"given":"Gaurav","family":"Bhatnagar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,24]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9365\u20139374 (2019)","DOI":"10.1109\/CVPR.2019.00959"},{"key":"10_CR2","doi-asserted-by":"publisher","unstructured":"Canny, J.: A computational approach to edge detection. IEEE Trans. Pattern Anal. Mach. Intell. PAMI-8(6), 679\u2013698 (1986). https:\/\/doi.org\/10.1109\/TPAMI.1986.4767851","DOI":"10.1109\/TPAMI.1986.4767851"},{"key":"10_CR3","doi-asserted-by":"publisher","unstructured":"Cao, Y., Xufang, P., Chan, A., Lau, R.: DynamicManga: animating still manga via camera movement. IEEE Trans. Multimedia 1\u20131 (2016). https:\/\/doi.org\/10.1109\/TMM.2016.2609415","DOI":"10.1109\/TMM.2016.2609415"},{"key":"10_CR4","doi-asserted-by":"publisher","unstructured":"C. Correia, J.M., P. Gomes, A.J.: Balloon extraction from complex comic books using edge detection and histogram scoring. Multimedia Tools Appl. 75(18), 11367\u201311390 (2015). https:\/\/doi.org\/10.1007\/s11042-015-2858-0","DOI":"10.1007\/s11042-015-2858-0"},{"key":"10_CR5","unstructured":"Du, Y., et\u00a0al.: PP-OCR: A practical ultra lightweight OCR system. arXiv preprint arXiv:2009.09941 (2020)"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Dubray, D., Laubrock, J.: Deep CNN-based speech balloon detection and segmentation for comic books. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1237\u20131243 (2019). https:\/\/api.semanticscholar.org\/CorpusID:67787670","DOI":"10.1109\/ICDAR.2019.00200"},{"key":"10_CR7","unstructured":"Durette, P.N., Contributors: gTTS: Google text-to-speech. https:\/\/gtts.readthedocs.io\/. Accessed 2025"},{"key":"10_CR8","doi-asserted-by":"publisher","unstructured":"Dutta, A., Biswas, S., Das, A.K.: CNN-based segmentation of speech balloons and narrative text boxes from comic book page images. Int. J. Doc. Anal. Recogn. (IJDAR) (18), 49\u201362 (2021). https:\/\/doi.org\/10.1007\/s10032-021-00366-4","DOI":"10.1007\/s10032-021-00366-4"},{"key":"10_CR9","doi-asserted-by":"publisher","unstructured":"Gupta, V., Detani, V., Khokar, V., Chattopadhyay, C.: C2VNet: a deep learning framework towards comic strip to audio-visual scene synthesis. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12822, pp. 160\u2013175. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86331-9_11","DOI":"10.1007\/978-3-030-86331-9_11"},{"key":"10_CR10","unstructured":"Halford, M.: Comic book panel segmentation (2022). https:\/\/maxhalford.github.io\/blog\/comic-book-panel-segmentation\/"},{"key":"10_CR11","doi-asserted-by":"publisher","unstructured":"Ho, A.K.N., Burie, J.C., Ogier, J.M.: Panel and speech balloon extraction from comic books. In: 2012 10th IAPR International Workshop on Document Analysis Systems, pp. 424\u2013428 (2012). https:\/\/doi.org\/10.1109\/DAS.2012.66","DOI":"10.1109\/DAS.2012.66"},{"key":"10_CR12","doi-asserted-by":"publisher","unstructured":"Iwata, M., Ito, A., Kise, K.: A study to achieve manga character retrieval method for manga images. In: 2014 11th IAPR International Workshop on Document Analysis Systems, pp. 309\u2013313 (2014). https:\/\/doi.org\/10.1109\/DAS.2014.60","DOI":"10.1109\/DAS.2014.60"},{"key":"10_CR13","doi-asserted-by":"crossref","unstructured":"Iyyer, M., et al.: The amazing mysteries of the gutter: drawing inferences between panels in comic book narratives. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.686"},{"key":"10_CR14","unstructured":"Iyyer, M., et al.: The amazing mysteries of the gutter: Drawing inferences between panels in comic book narratives. CoRR abs\/1611.05118 (2016). http:\/\/arxiv.org\/abs\/1611.05118"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Li, Y., Aizawa, K., Matsui, Y.: Manga109Dialog: A large-scale dialogue dataset for comics speaker detection (2024). https:\/\/arxiv.org\/abs\/2306.17469","DOI":"10.1109\/ICME57554.2024.10687709"},{"key":"10_CR16","doi-asserted-by":"publisher","unstructured":"Li, Y., Hinami, R., Aizawa, K., Matsui, Y.: Zero-shot character identification and speaker prediction in comics via iterative multimodal fusion. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 7366\u20137374. MM \u201924, Association for Computing Machinery, New York, NY, USA (2024). https:\/\/doi.org\/10.1145\/3664647.3681659","DOI":"10.1145\/3664647.3681659"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. In: European Conference on Computer Vision, pp. 38\u201355. Springer (2024)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"10_CR18","doi-asserted-by":"publisher","unstructured":"Nguyen, N.V., Rigaud, C., Burie, J.C.: Comic MTL: optimized multi-task learning for comic book image analysis. Int. J. Doc. Anal. Recognit. 22(3), 265\u2013284 (2019). https:\/\/doi.org\/10.1007\/s10032-019-00330-3","DOI":"10.1007\/s10032-019-00330-3"},{"key":"10_CR19","unstructured":"Ponsard, C.: Enhancing the accessibility for all of digital comic books. e-Minds 1 (2009)"},{"key":"10_CR20","unstructured":"Rigaud, C., Burie, J.C., et\u00a0al.: What do we expect from comic panel extraction? In: 2019 International Conference on Document Analysis and Recognition Workshops (ICDARW). vol.\u00a01, pp. 44\u201349. IEEE (2019)"},{"key":"10_CR21","doi-asserted-by":"publisher","unstructured":"Rigaud, C., Gu\u00e9rin, C., Karatzas, D., Burie, J.-C., Ogier, J.-M.: Knowledge-driven understanding of images in comic books. Int. J. Doc. Anal. Recogn. (IJDAR) 18(3), 199\u2013221 (2015). https:\/\/doi.org\/10.1007\/s10032-015-0243-1","DOI":"10.1007\/s10032-015-0243-1"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Rigaud, C., Karatzas, D., Van\u00a0de Weijer, J., Burie, J.C., Ogier, J.M.: An active contour model for speech balloon detection in comics. In: Proceedings of the 12th International Conference on Document Analysis and Recognition (ICDAR), pp. 1240\u20131244 (2013)","DOI":"10.1109\/ICDAR.2013.251"},{"key":"10_CR23","doi-asserted-by":"publisher","unstructured":"Rigaud, C., et al.: Speech balloon and speaker association for comics and manga understanding. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR), pp. 351\u2013355 (2015). https:\/\/doi.org\/10.1109\/ICDAR.2015.7333782","DOI":"10.1109\/ICDAR.2015.7333782"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Sachdeva, R., Shin, G., Zisserman, A.: Tails tell tales: chapter-wide manga transcriptions with character names. In: Proceedings of the Asian Conference on Computer Vision, pp. 2053\u20132069 (2024)","DOI":"10.1007\/978-981-96-0908-6_4"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Soykan, G., Yuret, D., Sezgin, T.M.: Spatially augmented speech bubble to character association via comic multi-task learning. In: International Conference on Document Analysis and Recognition, pp. 231\u2013256. Springer (2024)","DOI":"10.1007\/978-3-031-70645-5_15"},{"key":"10_CR26","unstructured":"Soykan, G., Yuret, D., Sezgin, T.M.: Identity-aware semi-supervised learning for comic character re-identification (2023). https:\/\/arxiv.org\/abs\/2308.09096"},{"key":"10_CR27","unstructured":"Zhao, X., et al.: Fast segment anything (2023)"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-09368-4_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T18:14:20Z","timestamp":1763921660000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-09368-4_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,24]]},"ISBN":["9783032093677","9783032093684"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-09368-4_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,24]]},"assertion":[{"value":"24 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}