{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T18:15:44Z","timestamp":1730312144616,"version":"3.28.0"},"reference-count":57,"publisher":"SPIE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,4,3]]},"DOI":"10.1117\/12.2651572","type":"proceedings-article","created":{"date-parts":[[2023,4,5]],"date-time":"2023-04-05T22:04:36Z","timestamp":1680732276000},"page":"53","source":"Crossref","is-referenced-by-count":0,"title":["Analyzing components of a transformer under different dataset scales in 3D prostate CT segmentation"],"prefix":"10.1117","author":[{"given":"Yicong","family":"Tan","sequence":"first","affiliation":[]},{"given":"Prerak","family":"Mody","sequence":"additional","affiliation":[]},{"given":"Viktor","family":"van der Valk","sequence":"additional","affiliation":[]},{"given":"Marius","family":"Staring","sequence":"additional","affiliation":[]},{"given":"Jan","family":"van Gemert","sequence":"additional","affiliation":[]}],"member":"189","reference":[{"key":"c1","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"c2","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint arXiv:1810.04805"},{"key":"c3","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"c4","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv preprint arXiv:2010.11929"},{"key":"c5","article-title":"Beit: Bert pre-training of image transformers","author":"Bao","year":"2021","journal-title":"arXiv preprint arXiv:2106.08254"},{"key":"c6","first-page":"12009","article-title":"Swin transformer v2: Scaling up capacity and resolution","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Liu","year":"2022"},{"key":"c7","article-title":"Vision transformer adapter for dense predictions","author":"Chen","year":"2022","journal-title":"arXiv preprint arXiv:2205.08534"},{"key":"c8","first-page":"3965","article-title":"Coatnet: Marrying convolution and attention for all data sizes","volume":"34","author":"Dai","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"c9","first-page":"22","article-title":"Cvt: Introducing convolutions to vision transformers","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Wu","year":"2021"},{"key":"c10","article-title":"Dino: Detr with improved denoising anchor boxes for end-to-end object detection","author":"Zhang","year":"2022","journal-title":"arXiv preprint arXiv:2203.03605"},{"key":"c11","article-title":"Axial attention in multidimensional transformers","author":"Ho","year":"2019","journal-title":"arXiv preprint arXiv:1912.12180"},{"key":"c12","article-title":"Shuffle transformer: Rethinking spatial shuffle for vision transformer","author":"Huang","year":"2021","journal-title":"arXiv preprint arXiv:2106.03650"},{"key":"c13","article-title":"Regionvit: Regional-to-local attention for vision transformers","author":"Chen","year":"2021","journal-title":"arXiv preprint arXiv:2106.02689"},{"doi-asserted-by":"publisher","key":"c14","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"c15","first-page":"28522","article-title":"Vitae: Vision transformer advanced by exploring intrinsic inductive bias","volume":"34","author":"Xu","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"c16","article-title":"Transformer in convolutional neural networks","author":"Liu","year":"2021","journal-title":"arXiv preprint arXiv:2106.03180"},{"key":"c17","article-title":"Conditional positional encodings for vision transformers","author":"Chu","year":"2021","journal-title":"arXiv preprint arXiv:2102.10882"},{"doi-asserted-by":"publisher","key":"c18","DOI":"10.1038\/s41592-020-01008-z"},{"key":"c19","article-title":"nnformer: Interleaved transformer for volumetric segmentation","author":"Zhou","year":"2021","journal-title":"arXiv preprint arXiv:2109.03201"},{"doi-asserted-by":"publisher","key":"c20","DOI":"10.1109\/WACV51458.2022.00181"},{"key":"c21","article-title":"Automatic segmentation of head and neck tumor: How powerful transformers are?","author":"Sobirov","year":"2022","journal-title":"[Medical Imaging with Deep Learning"},{"key":"c22","first-page":"14","article-title":"Transfuse: Fusing transformers and cnns for medical image segmentation","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Zhang","year":"2021"},{"key":"c23","article-title":"Is it time to replace cnns with transformers for medical images?","author":"Matsoukas","year":"2021","journal-title":"arXiv preprint arXiv:2108.09038"},{"key":"c24","first-page":"1","article-title":"A convnet for the 2020s","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Liu","year":"2022"},{"key":"c25","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","volume-title":"International Conference on Medical image computing and computer-assisted intervention","author":"Ronneberger","year":"2015"},{"key":"c26","first-page":"424","article-title":"3d u-net: learning dense volumetric segmentation from sparse annotation","volume-title":"International conference on medical image computing and computer-assisted intervention","author":"\u00c7i\u00e7ek","year":"2016"},{"key":"c27","first-page":"770","article-title":"Deep residual learning for image recognition","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"He","year":"2016"},{"key":"c28","first-page":"4700","article-title":"Densely connected convolutional networks","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Huang","year":"2017"},{"key":"c29","first-page":"327","article-title":"Weighted res-unet for high-quality retina vessel segmentation","volume-title":"2018 9th international conference on information technology in medicine and education (ITME)","author":"Xiao","year":"2018"},{"key":"c30","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1109\/ISM46123.2019.00049","article-title":"Resunet++: An advanced architecture for medical image segmentation","volume-title":"2019 IEEE International Symposium on Multimedia (ISM)","author":"Jha","year":"2019"},{"doi-asserted-by":"publisher","key":"c31","DOI":"10.1016\/j.neunet.2019.08.025"},{"doi-asserted-by":"publisher","key":"c32","DOI":"10.1109\/JBHI.6221020"},{"doi-asserted-by":"publisher","key":"c33","DOI":"10.1007\/978-3-030-00889-5"},{"doi-asserted-by":"publisher","key":"c34","DOI":"10.1109\/TMI.42"},{"key":"c35","first-page":"1","article-title":"Attention u-net: Learning where to look for the pancreas","author":"Oktay","year":"2018","journal-title":"arXiv preprint arXiv:1804.03999"},{"key":"c36","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1109\/ICIP40778.2020.9190761","article-title":"Attention unet++: A nested attention- aware u-net for liver ct image segmentation","volume-title":"2020 IEEE International Conference on Image Processing (ICIP)","author":"Li","year":"2020"},{"key":"c37","first-page":"205","article-title":"Ma-unet: An improved version of unet based on multi-scale and attention mechanism for medical image segmentation","volume-title":"Third International Conference on Electronics and Communication; Network and Computer Technology (ECNCT 2021)","volume":"12167","author":"Cai","year":"2022"},{"doi-asserted-by":"publisher","key":"c38","DOI":"10.3389\/fbioe.2020.00670"},{"key":"c39","first-page":"1","article-title":"Attention-augmented u-net (aa-u-net) for semantic segmentation","author":"Rajamani","year":"2022","journal-title":"Signal, image and video processing"},{"key":"c40","first-page":"61","article-title":"Utnet: a hybrid transformer architecture for medical image segmentation","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Gao","year":"2021"},{"key":"c41","first-page":"1","article-title":"Transunet: Transformers make strong encoders for medical image segmentation","author":"Chen","year":"2021","journal-title":"arXiv preprint arXiv:2102.04306"},{"key":"c42","first-page":"326","article-title":"Multi-compound transformer for accurate biomedical image segmentation","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Ji","year":"2021"},{"key":"c43","first-page":"1","article-title":"Transclaw u-net: Claw u-net with transformers for medical image segmentation","author":"Chang","year":"2021","journal-title":"arXiv preprint arXiv:2107.05188"},{"key":"c44","first-page":"1","article-title":"Swin-unet: Unet-like pure transformer for medical image segmentation","author":"Cao","year":"2021","journal-title":"arXiv preprint arXiv:2105.05537"},{"key":"c45","first-page":"12259","article-title":"Levit: a vision transformer in convnet\u2019s clothing for faster inference","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision","author":"Graham","year":"2021"},{"key":"c46","first-page":"1","article-title":"Levit-unet: Make faster encoders with transformer for medical image segmentation","author":"Xu","year":"2021","journal-title":"arXiv preprint arXiv:2107.08623"},{"key":"c47","first-page":"36","article-title":"Medical transformer: Gated axial- attention for medical image segmentation","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Valanarasu","year":"2021"},{"key":"c48","first-page":"99","article-title":"A multi-branch hybrid transformer network for corneal endothelial cell segmentation","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Zhang","year":"2021"},{"key":"c49","first-page":"109","article-title":"Transbts: Multimodal brain tumor segmentation using transformer","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Wang","year":"2021"},{"key":"c50","first-page":"272","article-title":"Swin unetr: Swin transformers for semantic segmentation of brain tumors in mri images","volume-title":"[International MICCAI Brainlesion Workshop","author":"Hatamizadeh","year":"2022"},{"key":"c51","first-page":"1","article-title":"D-former: A u-shaped dilated transformer for 3d medical image segmentation","author":"Wu","year":"2022","journal-title":"arXiv preprint arXiv:2201.00462"},{"key":"c52","first-page":"2","article-title":"Metaformer is actually what you need for vision","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Yu","year":"2022"},{"key":"c53","first-page":"24261","article-title":"Mlp-mixer: An all-mlp architecture for vision","volume":"34","author":"Tolstikhin","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"c54","first-page":"2","article-title":"Do you even need attention? a stack of feed-forward layers does surprisingly well on imagenet","author":"Melas-Kyriazi","year":"2021","journal-title":"arXiv preprint arXiv:2105.02723"},{"key":"c55","first-page":"2","article-title":"Patches are all you need?","author":"Trockman","year":"2022","journal-title":"arXiv preprint arXiv:2201.09792"},{"doi-asserted-by":"publisher","key":"c56","DOI":"10.1117\/1.JMI.5.4.044501"},{"doi-asserted-by":"publisher","key":"c57","DOI":"10.1016\/j.media.2013.12.002"}],"event":{"name":"Image Processing","start":{"date-parts":[[2023,2,19]]},"location":"San Diego, United States","end":{"date-parts":[[2023,2,24]]}},"container-title":["Medical Imaging 2023: Image Processing"],"original-title":[],"deposited":{"date-parts":[[2023,4,24]],"date-time":"2023-04-24T19:03:08Z","timestamp":1682362988000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.spiedigitallibrary.org\/conference-proceedings-of-spie\/12464\/2651572\/Analyzing-components-of-a-transformer-under-different-dataset-scales-in\/10.1117\/12.2651572.full"}},"subtitle":[],"editor":[{"given":"Ivana","family":"I\u0161gum","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Colliot","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2023,4,3]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1117\/12.2651572","relation":{},"subject":[],"published":{"date-parts":[[2023,4,3]]}}}