{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:36:11Z","timestamp":1758123371357,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705489"},{"type":"electronic","value":"9783031705496"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70549-6_22","type":"book-chapter","created":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:02:15Z","timestamp":1725786135000},"page":"368-383","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Latent Diffusion for\u00a0Guided Document Table Generation"],"prefix":"10.1007","author":[{"given":"Syed Jawwad Haider","family":"Hamdani","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3098-2458","authenticated-orcid":false,"given":"Saifullah","family":"Saifullah","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9697-4285","authenticated-orcid":false,"given":"Stefan","family":"Agne","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6100-8255","authenticated-orcid":false,"given":"Andreas","family":"Dengel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4239-6520","authenticated-orcid":false,"given":"Sheraz","family":"Ahmed","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"22_CR1","unstructured":"Austin, J., Johnson, D.D., Ho, J., Tarlow, D., van\u00a0den Berg, R.: Structured denoising diffusion models in discrete state-spaces (2023)"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. arXiv preprint arXiv:2005.12872 (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Gao, L., Yi, X., Jiang, Z., Hao, L., Tang, Z.: Icdar2017 competition on page object detection. In: Proceedings of the 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol. 1, pp. 1417\u20131422 (2017). https:\/\/api.semanticscholar.org\/CorpusID:34640499","DOI":"10.1109\/ICDAR.2017.231"},{"key":"22_CR4","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Ghahramani, Z., Welling, M., Cortes, C., Lawrence, N., Weinberger, K. (eds.) Advances in Neural Information Processing Systems, vol.\u00a027. Curran Associates, Inc. (2014). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2014\/file\/5ca3e9b122f61f8f06494c97b1afccf3-Paper.pdf"},{"key":"22_CR5","doi-asserted-by":"publisher","unstructured":"G\u00f6bel, M., Hassan, T., Oro, E., Orsi, G.: ICDAR 2013 table competition. In: 2013 12th International Conference on Document Analysis and Recognition, pp. 1449\u20131453 (2013). https:\/\/doi.org\/10.1109\/ICDAR.2013.292","DOI":"10.1109\/ICDAR.2013.292"},{"key":"22_CR6","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/978-3-031-41676-7_21","volume-title":"ICDAR 2023","author":"L He","year":"2023","unstructured":"He, L., Lu, Y., Corring, J., Florencio, D., Zhang, C.: Diffusion-based document layout generation. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023, pp. 361\u2013378. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41676-7_21"},{"key":"22_CR7","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Proceedings of the 34th International Conference on Neural Information Processing Systems (NIPS 2020). Curran Associates Inc., Red Hook (2020)"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Inoue, N., Kikuchi, K., Simo-Serra, E., Otani, M., Yamaguchi, K.: Layoutdm: discrete diffusion model for controllable layout generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10167\u201310176 (2023)","DOI":"10.1109\/CVPR52729.2023.00980"},{"key":"22_CR9","unstructured":"Karras, T., et al.: Alias-free generative adversarial networks. In: Proceedings of the NeurIPS (2021)"},{"key":"22_CR10","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational Bayes. In: 2nd International Conference on Learning Representations (ICLR 2014), Banff, 14\u201316 April 2014, Conference Track Proceedings (2014)"},{"key":"22_CR11","unstructured":"Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: TableBank: table benchmark for image-based table detection and recognition. In: Calzolari, N., et al. (eds.) Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 1918\u20131925. European Language Resources Association, Marseille (2020). https:\/\/aclanthology.org\/2020.lrec-1.236"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Lin, W., et al.: Tsrformer: table structure recognition with transformers (2022)","DOI":"10.1145\/3503161.3548038"},{"key":"22_CR13","unstructured":"Luhman, T., Luhman, E.: Diffusion models for handwriting generation (2020)"},{"key":"22_CR14","doi-asserted-by":"publisher","unstructured":"Nassar, A., Livathinos, N., Lysak, M., Staar, P.: Tableformer: table structure understanding with transformers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4604\u20134613 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.00457","DOI":"10.1109\/CVPR52688.2022.00457"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Nikolaidou, K., et al.: Wordstylist: styled verbatim handwritten text generation with latent diffusion models (2023)","DOI":"10.1007\/978-3-031-41679-8_22"},{"key":"22_CR16","unstructured":"van\u00a0den Oord, A., Kalchbrenner, N., Vinyals, O., Espeholt, L., Graves, A., Kavukcuoglu, K.: Conditional image generation with PixelCNN decoders (2016)"},{"key":"22_CR17","unstructured":"van\u00a0den Oord, A., Vinyals, O., Kavukcuoglu, K.: Neural discrete representation learning. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol.\u00a030. Curran Associates, Inc. (2017). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/7a98af17e63a0ac09ce2e96d03992fbc-Paper.pdf"},{"key":"22_CR18","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"22_CR19","unstructured":"Project, T.A.: Shabbypages: a reproducible document denoising and binarization dataset (2023). https:\/\/github.com\/sparkfish\/shabby-pages"},{"key":"22_CR20","unstructured":"Radford, A., Metz, L., Chintala, S.: Unsupervised representation learning with deep convolutional generative adversarial networks (2015). http:\/\/arxiv.org\/abs\/1511.06434, cite arxiv:1511.06434Comment: Under review as a conference paper at ICLR 2016"},{"key":"22_CR21","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents (2022)"},{"key":"22_CR22","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8821\u20138831. PMLR (2021). https:\/\/proceedings.mlr.press\/v139\/ramesh21a.html"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. arXiv preprint arXiv:1506.02640 (2015)","DOI":"10.1109\/CVPR.2016.91"},{"key":"22_CR24","unstructured":"Ren, S., He, K., Girshick, R.B., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. arXiv preprint arXiv:1506.01497 (2015)"},{"key":"22_CR25","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"Saharia, C., et al.: Palette: image-to-image diffusion models (2022). https:\/\/openreview.net\/forum?id=FPGs276lUeq","DOI":"10.1145\/3528233.3530757"},{"key":"22_CR27","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248986576"},{"key":"22_CR28","doi-asserted-by":"publisher","unstructured":"Saifullah, S.S.A., Agne, S., Dengel, A., Ahmed, S.: Are deep models robust against real distortions? A case study on document image classification. In: 2022 26th International Conference on Pattern Recognition (ICPR), pp. 1628\u20131635 (2022). https:\/\/doi.org\/10.1109\/ICPR56361.2022.9956167","DOI":"10.1109\/ICPR56361.2022.9956167"},{"key":"22_CR29","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/978-3-031-41734-4_13","volume-title":"ICDAR 2023","author":"S Saifullah","year":"2023","unstructured":"Saifullah, S., Agne, S., Dengel, A., Ahmed, S.: Coldbin: cold diffusion for document image binarization. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023, pp. 207\u2013226. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41734-4_13"},{"key":"22_CR30","unstructured":"Schuhmann, C., et al.: LAION-5b: an open large-scale dataset for training next generation image-text models. In: Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2022). https:\/\/openreview.net\/forum?id=M3Y74vmsMcY"},{"key":"22_CR31","doi-asserted-by":"publisher","unstructured":"Shen, Z., Zhang, R., Dell, M., Lee, B.C.G., Carlson, J., Li, W.: LayoutParser: a unified toolkit for deep learning based document image analysis. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12821, pp. 131\u2013146. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86549-8_9","DOI":"10.1007\/978-3-030-86549-8_9"},{"key":"22_CR32","doi-asserted-by":"crossref","unstructured":"Smock, B., Pesala, R., Abraham, R.: Pubtables-1m: towards comprehensive table extraction from unstructured documents (2021)","DOI":"10.1109\/CVPR52688.2022.00459"},{"key":"22_CR33","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"22_CR34","doi-asserted-by":"publisher","first-page":"438","DOI":"10.1007\/978-3-031-41682-8_27","volume-title":"ICDAR 2023","author":"N Tanveer","year":"2023","unstructured":"Tanveer, N., Ul-Hasan, A., Shafait, F.: Diffusion models for document image generation. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023, pp. 438\u2013453. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41682-8_27"},{"key":"22_CR35","doi-asserted-by":"crossref","unstructured":"Thanh-Tung, H., Tran, T.: Catastrophic forgetting and mode collapse in gans. In: 2020 International Joint Conference on Neural Networks (IJCNN), pp. 1\u201310 (2020). https:\/\/api.semanticscholar.org\/CorpusID:221659882","DOI":"10.1109\/IJCNN48605.2020.9207181"},{"key":"22_CR36","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol.\u00a030. Curran Associates, Inc. (2017). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"22_CR37","doi-asserted-by":"crossref","unstructured":"Xiao, B., Simsek, M., Kantarci, B., Alkheir, A.A.: Revisiting table detection datasets for visually rich documents (2023)","DOI":"10.1016\/j.knosys.2023.111080"},{"key":"22_CR38","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: Docdiff: document enhancement via residual diffusion models (2023)","DOI":"10.1145\/3581783.3611730"},{"key":"22_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"22_CR40","doi-asserted-by":"crossref","unstructured":"Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (GTE): a framework for joint table identification and cell structure recognition using visual context. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 697\u2013706 (2021)","DOI":"10.1109\/WACV48630.2021.00074"},{"key":"22_CR41","doi-asserted-by":"publisher","unstructured":"Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1015\u20131022. IEEE Computer Society, Los Alamitos (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00166","DOI":"10.1109\/ICDAR.2019.00166"},{"key":"22_CR42","doi-asserted-by":"crossref","unstructured":"Zhong, X., ShafieiBavani, E., Yepes, A.J.: Image-based table recognition: data, model, and evaluation. arXiv preprint arXiv:1911.10683 (2019)","DOI":"10.1007\/978-3-030-58589-1_34"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70549-6_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:08:43Z","timestamp":1725786523000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70549-6_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705489","9783031705496"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70549-6_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"9 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}