{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T14:47:34Z","timestamp":1776782854380,"version":"3.51.2"},"publisher-location":"New York, New York, USA","reference-count":64,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"DARPA","award":["FA8750-13-2-0019"],"award-info":[{"award-number":["FA8750-13-2-0019"]}]},{"name":"Army Research Office","award":["W911NF-17-1-0412"],"award-info":[{"award-number":["W911NF-17-1-0412"]}]},{"name":"Fujitsu Laboratories of America"},{"name":"the University of Washington WRF\/Cable Professorship"},{"name":"Ohio Supercomputer Center","award":["PAS1197"],"award-info":[{"award-number":["PAS1197"]}]},{"name":"NSF","award":["CNS-1513120"],"award-info":[{"award-number":["CNS-1513120"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1145\/3178876.3186081","type":"proceedings-article","created":{"date-parts":[[2018,4,13]],"date-time":"2018-04-13T15:53:48Z","timestamp":1523634828000},"page":"1693-1703","source":"Crossref","is-referenced-by-count":51,"title":["StaQC"],"prefix":"10.1145","author":[{"given":"Ziyu","family":"Yao","sequence":"first","affiliation":[{"name":"The Ohio State University, Columbus, OH, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel S.","family":"Weld","sequence":"additional","affiliation":[{"name":"University of Washington, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei-Peng","family":"Chen","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories of America, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huan","family":"Sun","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, OH, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","reference":[{"key":"key-10.1145\/3178876.3186081-1","unstructured":"Miltiadis Allamanis, Hao Peng, and Charles Sutton. 2016. A convolutional attention network for extreme summarization of source code ICML. 2091--2100."},{"key":"key-10.1145\/3178876.3186081-2","unstructured":"Miltos Allamanis, Daniel Tarlow, Andrew Gordon, and Yi Wei. 2015. Bimodal modelling of source code and natural language ICML. 2123--2132."},{"key":"key-10.1145\/3178876.3186081-3","unstructured":"Alberto Bacchelli. 2013. Mining Challenge 2013: Stack Overflow. In The 10th Working Conference on Mining Software Repositories. to appear."},{"key":"key-10.1145\/3178876.3186081-4","unstructured":"Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2014. Neural Machine Translation by Jointly Learning to Align and Translate. CoRR Vol. abs\/1409.0473 (2014). [arxiv]1409.0473 http:\/\/arxiv.org\/abs\/1409.0473"},{"key":"key-10.1145\/3178876.3186081-5","unstructured":"Antonio Valerio Miceli Barone and Rico Sennrich. 2017. A parallel corpus of Python functions and documentation strings for automated code documentation and code generation. arXiv preprint arXiv:1707.02275 (2017)."},{"key":"key-10.1145\/3178876.3186081-6","doi-asserted-by":"crossref","unstructured":"Brock Angus Campbell and Christoph Treude. 2017. NLP2Code: Code Snippet Content Assist via Natural Language Tasks. arXiv preprint arXiv:1701.05648 (2017).","DOI":"10.1109\/ICSME.2017.56"},{"key":"key-10.1145\/3178876.3186081-7","unstructured":"Ohio Supercomputer Center. 1987. Ohio Supercomputer Center. http:\/\/osc.edu\/ark:\/19495\/f5s1ph73. (1987)."},{"key":"key-10.1145\/3178876.3186081-8","unstructured":"Kyunghyun Cho, Bart van Merri&#235;nboer, Calar G&#252;lc cehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning Phrase Representations using RNN Encoder--Decoder for Statistical Machine Translation. In EMNLP. Association for Computational Linguistics, Doha, Qatar, 1724--1734."},{"key":"key-10.1145\/3178876.3186081-9","doi-asserted-by":"crossref","unstructured":"Jacob Cohen. 1960. A coefficient of agreement for nominal scales. Educational and psychological measurement Vol. 20, 1 (1960), 37--46.","DOI":"10.1177\/001316446002000104"},{"key":"key-10.1145\/3178876.3186081-10","doi-asserted-by":"crossref","unstructured":"Corinna Cortes and Vladimir Vapnik. 1995. Support-vector networks. Machine learning Vol. 20, 3 (1995), 273--297.","DOI":"10.1007\/BF00994018"},{"key":"key-10.1145\/3178876.3186081-11","unstructured":"David R Cox. 1958. The regression analysis of binary sequences. Journal of the Royal Statistical Society. Series B (Methodological) (1958), 215--242."},{"key":"key-10.1145\/3178876.3186081-12","unstructured":"Lucas BL de Souza, Eduardo C Campos, and Marcelo de A Maia. 2014. Ranking crowd knowledge to assist software development Proceedings of the 22nd International Conference on Program Comprehension. ACM, 72--82."},{"key":"key-10.1145\/3178876.3186081-13","doi-asserted-by":"crossref","unstructured":"Fernanda Madeiral Delfim, Kl&#233;risson VR Paix ao, Damien Cassou, and Marcelo de Almeida Maia. 2016. Redocumenting APIs with crowd knowledge: a coverage analysis based on question types. Journal of the Brazilian Computer Society Vol. 22, 1 (2016), 9.","DOI":"10.1186\/s13173-016-0049-0"},{"key":"key-10.1145\/3178876.3186081-14","unstructured":"Maarten Duijn, Adam Kuvcera, and Alberto Bacchelli. 2015. Quality questions need quality code: classifying code fragments on stack overflow Proceedings of the 12th Working Conference on Mining Software Repositories. IEEE Press, 410--413."},{"key":"key-10.1145\/3178876.3186081-15","unstructured":"Alessandra Giordani and Alessandro Moschitti. 2009. Semantic mapping between natural language questions and SQL queries via syntactic pairing. In International Conference on Application of Natural Language to Information Systems. Springer, 207--221."},{"key":"key-10.1145\/3178876.3186081-16","unstructured":"Alessandra Giordani and Alessandro Moschitti. 2010. Corpora for Automatically Learning to Map Natural Language Questions into SQL Queries. LREC."},{"key":"key-10.1145\/3178876.3186081-17","unstructured":"GitHub. 2017. GitHub. (2017). https:\/\/github.com\/"},{"key":"key-10.1145\/3178876.3186081-18","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics. 249--256."},{"key":"key-10.1145\/3178876.3186081-19","unstructured":"Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. MIT Press. http:\/\/www.deeplearningbook.org"},{"key":"key-10.1145\/3178876.3186081-20","unstructured":"Karl Moritz Hermann, Tomas Kocisky, Edward Grefenstette, Lasse Espeholt, Will Kay, Mustafa Suleyman, and Phil Blunsom. 2015. Teaching machines to read and comprehend. In NIPS. 1693--1701."},{"key":"key-10.1145\/3178876.3186081-21","unstructured":"Srinivasan Iyer, Ioannis Konstas, Alvin Cheung, and Luke Zettlemoyer. 2016. Summarizing source code using a neural attention model ACL, Vol. 1. 2073--2083."},{"key":"key-10.1145\/3178876.3186081-22","doi-asserted-by":"crossref","unstructured":"Iman Keivanloo, Juergen Rilling, and Ying Zou. 2014. Spotting working code examples. In ICSE. ACM, 664--675.","DOI":"10.1145\/2568225.2568292"},{"key":"key-10.1145\/3178876.3186081-23","unstructured":"Diederik Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"key-10.1145\/3178876.3186081-24","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In NIPS, bibfieldeditorF. Pereira, C. J. C. Burges, L. Bottou, and K. Q. Weinberger (Eds.). Curran Associates, Inc., 1097--1105."},{"key":"key-10.1145\/3178876.3186081-25","unstructured":"Jiwei Li, Minh-Thang Luong, and Dan Jurafsky. 2015. A hierarchical neural autoencoder for paragraphs and documents. arXiv preprint arXiv:1506.01057 (2015)."},{"key":"key-10.1145\/3178876.3186081-26","doi-asserted-by":"crossref","unstructured":"Wang Ling, Edward Grefenstette, Karl Moritz Hermann, Tom&#225;vs Kovcisk&#7923;, Andrew Senior, Fumin Wang, and Phil Blunsom. 2016. Latent predictor networks for code generation. arXiv preprint arXiv:1603.06744 (2016).","DOI":"10.18653\/v1\/P16-1057"},{"key":"key-10.1145\/3178876.3186081-27","doi-asserted-by":"crossref","unstructured":"Edward Loper and Steven Bird. 2002. NLTK: The Natural Language Toolkit. In Proceedings of the ACL-02 Workshop on Effective Tools and Methodologies for Teaching Natural Language Processing and Computational Linguistics - Volume 1 (ETMTNLP '02). Association for Computational Linguistics, Stroudsburg, PA, USA, 63--70. https:\/\/doi.org\/10.3115\/1118108.1118117","DOI":"10.3115\/1118108.1118117"},{"key":"key-10.1145\/3178876.3186081-28","doi-asserted-by":"crossref","unstructured":"Pablo Loyola, Edison Marrese-Taylor, and Yutaka Matsuo. 2017. A Neural Architecture for Generating Natural Language Descriptions from Source Code Changes. arXiv preprint arXiv:1704.04856 (2017).","DOI":"10.18653\/v1\/P17-2045"},{"key":"key-10.1145\/3178876.3186081-29","doi-asserted-by":"crossref","unstructured":"Minh-Thang Luong, Hieu Pham, and Christopher D. Manning. 2015. Effective Approaches to Attention-based Neural Machine Translation EMNLP.","DOI":"10.18653\/v1\/D15-1166"},{"key":"key-10.1145\/3178876.3186081-30","unstructured":"Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality NIPS. 3111--3119."},{"key":"key-10.1145\/3178876.3186081-31","doi-asserted-by":"crossref","unstructured":"Lili Mou, Ge Li, Lu Zhang, Tao Wang, and Zhi Jin. 2016. Convolutional neural networks over tree structures for programming language processing AAAI.","DOI":"10.1609\/aaai.v30i1.10139"},{"key":"key-10.1145\/3178876.3186081-32","unstructured":"Seyed Mehdi Nasehi, Jonathan Sillito, Frank Maurer, and Chris Burns. 2012. What makes a good code example?: A study of programming Q&#38;A in StackOverflow Software Maintenance (ICSM), 2012 28th IEEE International Conference on. IEEE, 25--34."},{"key":"key-10.1145\/3178876.3186081-33","doi-asserted-by":"crossref","unstructured":"Anh Tuan Nguyen and Tien N Nguyen. 2015. Graph-based statistical language model for code. In Proceedings of the 37th International Conference on Software Engineering-Volume 1. IEEE Press, 858--868.","DOI":"10.1109\/ICSE.2015.336"},{"key":"key-10.1145\/3178876.3186081-34","unstructured":"Kamal Nigam and Rayid Ghani. 2000. Analyzing the effectiveness and applicability of co-training CIKM. ACM, 86--93."},{"key":"key-10.1145\/3178876.3186081-35","unstructured":"Yusuke Oda, Hiroyuki Fudaba, Graham Neubig, Hideaki Hata, Sakriani Sakti, Tomoki Toda, and Satoshi Nakamura. 2015. Learning to generate pseudo-code from source code using statistical machine translation (t). In Automated Software Engineering (ASE), 2015 30th IEEE\/ACM International Conference on. IEEE, 574--584."},{"key":"key-10.1145\/3178876.3186081-36","unstructured":"Stack Overflow. 2017 a. How do I find a 'gap' in running counter with SQL? (2017). https:\/\/stackoverflow.com\/a\/1312137\/4941215"},{"key":"key-10.1145\/3178876.3186081-37","unstructured":"Stack Overflow. 2017 b. How to find a gap in range in SQL. (2017). https:\/\/stackoverflow.com\/a\/17782635\/4941215"},{"key":"key-10.1145\/3178876.3186081-38","unstructured":"Stack Overflow. 2017 c. How to limit a number to be within a specified range? (Python). (2017). https:\/\/stackoverflow.com\/a\/5996949\/4941215"},{"key":"key-10.1145\/3178876.3186081-39","unstructured":"Stack Overflow. 2017 d. Splitting a dataframe based on column values. (2017). https:\/\/stackoverflow.com\/a\/33973304\/4941215"},{"key":"key-10.1145\/3178876.3186081-40","unstructured":"Stack Overflow. 2017 e. Stack Overflow. (2017). https:\/\/stackoverflow.com\/"},{"key":"key-10.1145\/3178876.3186081-41","unstructured":"F. Pedregosa, G. Varoquaux, A. Gramfort, V. Michel, B. Thirion, O. Grisel, M. Blondel, P. Prettenhofer, R. Weiss, V. Dubourg, J. Vanderplas, A. Passos, D. Cournapeau, M. Brucher, M. Perrot, and E. Duchesnay. 2011. Scikit-learn: Machine Learning in Python. Journal of Machine Learning Research Vol. 12 (2011), 2825--2830."},{"key":"key-10.1145\/3178876.3186081-42","doi-asserted-by":"crossref","unstructured":"Jeffrey Pennington, Richard Socher, and Christopher D Manning. 2014. Glove: Global Vectors for Word Representation.. In EMNLP, Vol. Vol. 14. 1532--1543.","DOI":"10.3115\/v1\/D14-1162"},{"key":"key-10.1145\/3178876.3186081-43","unstructured":"Rashmi Prasad, Nikhil Dinesh, Alan Lee, Eleni Miltsakaki, Livio Robaldo, Aravind Joshi, and Bonnie Webber. 2008. The Penn Discourse TreeBank 2.0. In In Proceedings of LREC."},{"key":"key-10.1145\/3178876.3186081-44","doi-asserted-by":"crossref","unstructured":"Maxim Rabinovich, Mitchell Stern, and Dan Klein. 2017. Abstract Syntax Networks for Code Generation and Semantic Parsing ACL.","DOI":"10.18653\/v1\/P17-1105"},{"key":"key-10.1145\/3178876.3186081-45","doi-asserted-by":"crossref","unstructured":"Mukund Raghothaman, Yi Wei, and Youssef Hamadi. 2016. SWIM: synthesizing what I mean: code search and idiomatic snippet synthesis ICSE. ACM, 357--367.","DOI":"10.1145\/2884781.2884808"},{"key":"key-10.1145\/3178876.3186081-46","unstructured":"Alexander J Ratner, Christopher M De Sa, Sen Wu, Daniel Selsam, and Christopher R&#233;. 2016. Data programming: Creating large training sets, quickly NIPS. 3567--3575."},{"key":"key-10.1145\/3178876.3186081-47","unstructured":"David E Rumelhart, Geoffrey E Hinton, Ronald J Williams, et al. 1988. Learning representations by back-propagating errors. Cognitive modeling Vol. 5, 3 (1988), 1."},{"key":"key-10.1145\/3178876.3186081-48","doi-asserted-by":"crossref","unstructured":"Iulian Vlad Serban, Alessandro Sordoni, Yoshua Bengio, Aaron C Courville, and Joelle Pineau. 2016. Building End-To-End Dialogue Systems Using Generative Hierarchical Neural Network Models.. In AAAI. 3776--3784.","DOI":"10.1609\/aaai.v30i1.9883"},{"key":"key-10.1145\/3178876.3186081-49","unstructured":"Yelong Shen, Xiaodong He, Jianfeng Gao, Li Deng, and Gr&#233;goire Mesnil. 2014. A latent semantic model with convolutional-pooling structure for information retrieval CIKM. ACM, 101--110."},{"key":"key-10.1145\/3178876.3186081-50","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"key-10.1145\/3178876.3186081-51","unstructured":"Inc Stack Exchange. 2017. Stack Exchange Data Dump. (2017). https:\/\/archive.org\/details\/stackexchange"},{"key":"key-10.1145\/3178876.3186081-52","doi-asserted-by":"crossref","unstructured":"Yu Su, Ahmed Hassan Awadallah, Madian Khabsa, Patrick Pantel, and Michael Gamon. 2017. Building Natural Language Interfaces to Web APIs, In CIKM.","DOI":"10.1145\/3132847.3133009"},{"key":"key-10.1145\/3178876.3186081-53","unstructured":"Christian Szegedy, Alexander Toshev, and Dumitru Erhan. 2013. Deep neural networks for object detection. In NIPS. 2553--2561."},{"key":"key-10.1145\/3178876.3186081-54","doi-asserted-by":"crossref","unstructured":"Duyu Tang, Bing Qin, and Ting Liu. 2015. Document Modeling with Gated Recurrent Neural Network for Sentiment Classification. EMNLP. 1422--1432.","DOI":"10.18653\/v1\/D15-1167"},{"key":"key-10.1145\/3178876.3186081-55","unstructured":"TensorFlow. 2017. TensorFlow. (2017). https:\/\/www.tensorflow.org\/"},{"key":"key-10.1145\/3178876.3186081-56","doi-asserted-by":"crossref","unstructured":"Christoph Treude, Ohad Barzilay, and Margaret-Anne Storey. 2011. How do programmers ask and answer questions on the web?: Nier track ICSE. IEEE, 804--807.","DOI":"10.1145\/1985793.1985907"},{"key":"key-10.1145\/3178876.3186081-57","doi-asserted-by":"crossref","unstructured":"Venkatesh Vinayakarao, Anita Sarma, Rahul Purandare, Shuktika Jain, and Saumya Jain. 2017. Anne: Improving source code search using entity retrieval approach Proceedings of the Tenth ACM International Conference on Web Search and Data Mining. ACM, 211--220.","DOI":"10.1145\/3018661.3018691"},{"key":"key-10.1145\/3178876.3186081-58","unstructured":"Ellen M Voorhees et al. 1999. The TREC-8 Question Answering Track Report.. In Trec, Vol. Vol. 99. 77--82."},{"key":"key-10.1145\/3178876.3186081-59","doi-asserted-by":"crossref","unstructured":"Di Yang, Aftab Hussain, and Cristina Videira Lopes. 2016 a. From query to usable code: an analysis of stack overflow code snippets Proceedings of the 13th International Workshop on Mining Software Repositories. ACM, 391--402.","DOI":"10.1145\/2901739.2901767"},{"key":"key-10.1145\/3178876.3186081-60","doi-asserted-by":"crossref","unstructured":"Zichao Yang, Diyi Yang, Chris Dyer, Xiaodong He, Alex Smola, and Eduard Hovy. 2016 b. Hierarchical attention networks for document classification Proceedings of NAACL-HLT. 1480--1489.","DOI":"10.18653\/v1\/N16-1174"},{"key":"key-10.1145\/3178876.3186081-61","unstructured":"Pengcheng Yin and Graham Neubig. 2017. A Syntactic Neural Model for General-Purpose Code Generation ACL. Vancouver, Canada."},{"key":"key-10.1145\/3178876.3186081-62","unstructured":"Annie T. T. Ying. 2015. Mining Challenge 2015: Comparing and combining different information sources on the Stack Overflow data set. In The 12th Working Conference on Mining Software Repositories. to appear."},{"key":"key-10.1145\/3178876.3186081-63","doi-asserted-by":"crossref","unstructured":"Zhi-Hua Zhou and Ming Li. 2005. Tri-training: Exploiting unlabeled data using three classifiers. IEEE Transactions on knowledge and Data Engineering Vol. 17, 11 (2005), 1529--1541.","DOI":"10.1109\/TKDE.2005.186"},{"key":"key-10.1145\/3178876.3186081-64","doi-asserted-by":"crossref","unstructured":"Meital Zilberstein and Eran Yahav. 2016. Leveraging a corpus of natural language descriptions for program similarity. In Proceedings of the 2016 ACM International Symposium on New Ideas, New Paradigms, and Reflections on Programming and Software. ACM, 197--211.","DOI":"10.1145\/2986012.2986013"}],"event":{"name":"the 2018 World Wide Web Conference","location":"Lyon, France","acronym":"WWW '18","number":"2018","sponsor":["SIGWEB, ACM Special Interest Group on Hypertext, Hypermedia, and Web","IW3C2, International World Wide Web Conference Committee"],"start":{"date-parts":[[2018,4,23]]},"end":{"date-parts":[[2018,4,27]]}},"container-title":["Proceedings of the 2018 World Wide Web Conference on World Wide Web - WWW '18"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178876.3186081","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3186081&ftid=1957404&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:11:28Z","timestamp":1750212688000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3178876.3186081"}},"subtitle":["A Systematically Mined Question-Code Dataset from Stack Overflow"],"proceedings-subject":"World Wide Web","short-title":[],"issued":{"date-parts":[[2018]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1145\/3178876.3186081","relation":{},"subject":[],"published":{"date-parts":[[2018]]}}}