{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:45:00Z","timestamp":1777016700338,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","funder":[{"name":"Department of Biotechnology, Ministry of Science and Technology, India","award":["BT\/PR40123\/BTIS\/137\/47\/2022"],"award-info":[{"award-number":["BT\/PR40123\/BTIS\/137\/47\/2022"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,17]]},"DOI":"10.1145\/3799830.3799866","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T06:45:08Z","timestamp":1777013108000},"page":"402-407","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A fast method for clustering regulatory DNA regions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7458-690X","authenticated-orcid":false,"given":"Akash","family":"Dileep K","sequence":"first","affiliation":[{"name":"Indian Institute of Science Education and Research Pune, Pune, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6820-1878","authenticated-orcid":false,"given":"Leelavati","family":"Narlikar","sequence":"additional","affiliation":[{"name":"Departments of Data Science and of Biology, Indian Institute of Science Education and Research Pune, Pune, India"}]}],"member":"320","published-online":{"date-parts":[[2026,4,23]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","unstructured":"S.\u00a0F. Altschul W. Gish W. Miller E.\u00a0W. Myers and D.\u00a0J. Lipman. 1990. Basic local alignment search tool. Journal of Molecular Biology 215 3 (1990) 403\u2013410. 10.1016\/S0022-2836(05)80360-2","DOI":"10.1016\/S0022-2836(05)80360-2"},{"key":"e_1_3_3_1_3_2","unstructured":"T. Bailey and C. Elkan. 1994. Fitting a mixture model by expectation maximization to discover motifs in biopolymers. AAAI Press 28\u201336."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Anushua Biswas and Leelavati Narlikar. 2021. A universal framework for detecting cis-regulatory diversity in DNA regulatory regions. Genome Research 31 9 (2021) 1646\u20131662.","DOI":"10.1101\/gr.274563.120"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","unstructured":"Dirk Eddelbuettel and Romain Francois. 2011. Rcpp: Seamless R and C++ Integration. Journal of Statistical Software 40 8 (2011) 1\u201318. 10.18637\/jss.v040.i08","DOI":"10.18637\/jss.v040.i08"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0C Edgar. 2004. MUSCLE: a multiple sequence alignment method with reduced time and space complexity. BMC bioinformatics 5 1 (2004) 113.","DOI":"10.1186\/1471-2105-5-113"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Fiorella\u00a0C Grandi Hailey Modi Lucas Kampman and M\u00a0Ryan Corces. 2022. Chromatin accessibility profiling by ATAC-seq. Nature protocols 17 6 (2022) 1518\u20131552.","DOI":"10.1038\/s41596-022-00692-9"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"M. Haeussler A.\u00a0S. Zweig C. Tyner M.\u00a0L. Speir K.\u00a0R. Rosenbloom B.\u00a0J. Raney C.\u00a0M. Lee B.\u00a0T. Lee A.\u00a0S. Hinrichs J.\u00a0N. G\u00a0onzalez D. Gibson M. Diekhans H. Clawson J. Casper G.\u00a0P. Barber D. Haussler R.\u00a0M. Kuhn and W.\u00a0J. Kent. 2019. The UCSC Genome Browser database: 2019 update. Nucleic Acids Research 47 D1 (2019) D853\u2013D858.","DOI":"10.1093\/nar\/gky1095"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","unstructured":"Michael Hahsler Kurt Hornik and Christian Buchta. 2008. Getting Things in Order: An Introduction to the R Package seriation. Journal of Statistical Software 25 3 (2008) 1\u201334. 10.18637\/jss.v025.i03","DOI":"10.18637\/jss.v025.i03"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","unstructured":"A. Hasegawa R. Shimizu H. Kurokawa and M. Yamamoto. 2012. DNA Binding Diversity Achieved Through the Interaction of GATA1 N\u2011Finger and GATA Motif Is Important for Embryonic Erythropoiesis. Blood 120 21 (2012) 3441. 10.1182\/blood.V120.21.3441.3441","DOI":"10.1182\/blood.V120.21.3441.3441"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"S. Heinz C. Benner N. Spann E. Bertolino Y.\u00a0C. Lin P. Laslo J.\u00a0X. Cheng C. Murre H. Singh and C.\u00a0K. Glass. 2010. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Molecular Cell 38 4 (May 2010) 576\u2013589.","DOI":"10.1016\/j.molcel.2010.05.004"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"J. Hu B. Li and D. Kihara. 2005. Limitations and potentials of current motif discovery algorithms. Nucleic Acids Research 33 15 (2005) 4899\u20134913.","DOI":"10.1093\/nar\/gki791"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Lars\u00a0Juhl Jensen and Steen Knudsen. 2000. Automatic discovery of regulatory patterns in promoter regions based on whole cell expression data and functional annotation. Bioinformatics 16 4 (2000) 326\u2013333. 10.1093\/bioinformatics\/16.4.326","DOI":"10.1093\/bioinformatics\/16.4.326"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"D.\u00a0S. Johnson A. Mortazavi R.\u00a0M. Myers and B. Wold. 2007. Genome-wide mapping of in vivo protein-DNA interactions. Science 316 5830 (2007) 1497\u20131502.","DOI":"10.1126\/science.1141319"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"A. Khan O. Fornes A. Stigliani M. Gheorghe J.\u00a0A. Castro-Mondragon R. van\u00a0der Lee A. Bessy J. Ch?neby S.\u00a0R. Kulkarni G. Tan D. Baranasic D.\u00a0J. Arenillas A. Sandelin K. Vandepoele B. Lenhard B. Ballester W.\u00a0W. Wasserman F. Parcy and A. Mathel\u00a0ier. 2018. JASPAR 2018: update of the open-access database of transcription factor binding profiles and its web framework. Nucleic Acids Research 46 D1 (01 2018) D260\u2013D266.","DOI":"10.1093\/nar\/gkx1126"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"L.\u00a0J. Ko and J.\u00a0D. Engel. 1993. DNA-binding specificities of the GATA transcription factor family. Molecular and Cellular Biology 13 7 (1993) 4011\u20134022.","DOI":"10.1128\/mcb.13.7.4011-4022.1993"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"P. Langfelder B. Zhang and S. Horvath. 2008. Defining clusters from a hierarchical cluster tree: the Dynamic Tree Cut package for R. Bioinformatics 24 5 (2008) 719\u2013720.","DOI":"10.1093\/bioinformatics\/btm563"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"W. Li L. Shang K. Huang J. Li Z. Wang and H. Yao. 2017. Identification of critical base pairs required for CTCF binding in motif M1 and M2. Protein Cell 8 7 (2017) 544\u2013549.","DOI":"10.1007\/s13238-017-0387-5"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"S. Mitra and L. Narlikar. 2016. No Promoter Left Behind (NPLB): learn de novo promoter architectures from genome-wide transcription start sites. Bioinformatics 32 5 (2016) 779\u2013781.","DOI":"10.1093\/bioinformatics\/btv645"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"L. Narlikar. 2013. MuMod: A Bayesian approach to detect multiple modes of protein-DNA binding from genome-wide ChIP data. Nucleic Acids Research 41 (2013) 21\u201332.","DOI":"10.1093\/nar\/gks950"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"L. Narlikar. 2014. Multiple novel promoter-architectures revealed by decoding the hidden heterogeneity within the genome. Nucleic Acids Research 42 20 (Nov 2014) 12388\u201312403.","DOI":"10.1093\/nar\/gku924"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"T. Ni D.\u00a0L. Corcoran E.\u00a0A. Rach S. Song E.\u00a0P. Spana Y. Gao U. Ohler and J. Zhu. 2010. A paired-end sequencing strategy to map the complex landscape of transcription initiation. Nature Methods 7 7 (Jul 2010) 521\u2013527.","DOI":"10.1038\/nmeth.1464"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"S. Nikumbh and B. Lenhard. 2023. Identifying promoter sequence architectures via a chunking-based algorithm using non-negative matrix factorisation. PLoS Computational Biology 19 11 (2023) e1011491.","DOI":"10.1371\/journal.pcbi.1011491"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"R.\u00a0R. Puig P. Boddie A. Khan J.\u00a0A. Castro-Mondragon and A. Mathelier. 2021. UniBind: maps of high-confidence direct TF-DNA interactions across nine species. BMC Genomics 22 1 (2021) 482.","DOI":"10.1186\/s12864-021-07760-6"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Ho\u00a0Sung Rhee and B\u00a0Franklin Pugh. 2012. ChIP-exo method for identifying genomic location of DNA-binding proteins with near-single-nucleotide accuracy. Current protocols in molecular biology 100 1 (2012) 21\u201324.","DOI":"10.1002\/0471142727.mb2124s100"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"P.\u00a0J. Rousseeuw. 1987. Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. J. Comput. Appl. Math. 20 (1987) 53\u201365.","DOI":"10.1016\/0377-0427(87)90125-7"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"T. Shiraki S. Kondo S. Katayama K. Waki T. Kasukawa H. Kawaji R. Kodzius A. Watahiki M. Nakamura T. Arakawa S. Fukuda D. Sasaki A. Podhajska M. Harbers J. Kawai P. Carninci and Y. Hayashizaki. 2003. Cap analysis gene expression for high-throughput analysis of transcriptional starting point and identification of promoter usage. Proceedings of the National Academy of Sciences 100 26 (2003) 15776\u201315781.","DOI":"10.1073\/pnas.2136655100"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.5555\/645635.660991"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"H. Tanaka Y. Takizawa M. Takaku D. Kato Y. Kumagawa S.\u00a0A. Grimm P.\u00a0A. Wade and H. Kurumizaka. 2020. Interaction of the pioneer transcription factor GATA3 with nucleosomes. Nature Communications 11 1 (2020) 4136.","DOI":"10.1038\/s41467-020-17959-y"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","unstructured":"Jacques van Helden Boris Andr\u00e9 and Julio Collado-Vides. 2000. Discovering regulatory elements in non-coding sequences by analysis of spaced dyads. Nucleic Acids Research 28 8 (2000) 1808\u20131818. 10.1093\/nar\/28.8.1808","DOI":"10.1093\/nar\/28.8.1808"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Isobel\u00a0A. Wadman Hirotaka Osada Gerald\u00a0G. Gr\u00fctz Alan\u00a0D. Agulnick Heiner Westphal Alan Forster and Terence\u00a0H. Rabbitts. 1997. The LIM\u2010only protein Lmo2 is a bridging molecule assembling an erythroid DNA\u2010binding complex which includes the TAL1 E47 GATA\u20101 and Ldb1\/NLI proteins. The EMBO Journal 16 11 (1997) 3145\u20133157.","DOI":"10.1093\/emboj\/16.11.3145"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"J. Yang J.\u00a0R. Horton X. Zhang X. Wang V.\u00a0G. Corces and X. Cheng. 2023. Structures of CTCF\u2013DNA complexes including all 11 zinc fingers. Nucleic Acids Research 51 16 (2023) 8447\u20138462.","DOI":"10.1093\/nar\/gkad594"}],"event":{"name":"CODS 2025: 13th ACM IKDD International Conference on Data Science","location":"Pune India","acronym":"CODS 2025"},"container-title":["Proceedings of the 13th ACM IKDD International Conference on Data Science"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3799830.3799866","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:14:20Z","timestamp":1777014860000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3799830.3799866"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,17]]},"references-count":31,"alternative-id":["10.1145\/3799830.3799866","10.1145\/3799830"],"URL":"https:\/\/doi.org\/10.1145\/3799830.3799866","relation":{},"subject":[],"published":{"date-parts":[[2025,12,17]]},"assertion":[{"value":"2026-04-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}