{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T16:16:07Z","timestamp":1774023367999,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,29]],"date-time":"2020-06-29T00:00:00Z","timestamp":1593388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,29]]},"DOI":"10.1145\/3379597.3387461","type":"proceedings-article","created":{"date-parts":[[2020,9,19]],"date-time":"2020-09-19T02:12:49Z","timestamp":1600481569000},"page":"32-42","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":42,"title":["A Machine Learning Approach for Vulnerability Curation"],"prefix":"10.1145","author":[{"given":"Yang","family":"Chen","sequence":"first","affiliation":[{"name":"Veracode"}]},{"given":"Andrew E.","family":"Santosa","sequence":"additional","affiliation":[{"name":"Veracode"}]},{"given":"Ang Ming","family":"Yi","sequence":"additional","affiliation":[{"name":"Veracode"}]},{"given":"Abhishek","family":"Sharma","sequence":"additional","affiliation":[{"name":"Veracode"}]},{"given":"Asankhaya","family":"Sharma","sequence":"additional","affiliation":[{"name":"Veracode"}]},{"given":"David","family":"Lo","sequence":"additional","affiliation":[{"name":"Singapore Management University"}]}],"member":"320","published-online":{"date-parts":[[2020,9,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Black Duck Software Composition Analysis. https:\/\/www.synopsys.com\/software- integrity\/security- testing\/software- composition- analysis.html.  [n.d.]. Black Duck Software Composition Analysis. https:\/\/www.synopsys.com\/software- integrity\/security- testing\/software- composition- analysis.html."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. gensim: Topic Modelling for Humans. https:\/\/radimrehurek.com\/gensim\/index.html.  [n.d.]. gensim: Topic Modelling for Humans. https:\/\/radimrehurek.com\/gensim\/index.html."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. NVD - Home. https:\/\/nvd.nist.gov\/.  [n.d.]. NVD - Home. https:\/\/nvd.nist.gov\/."},{"key":"e_1_3_2_1_4_1","unstructured":"[n.d.]. rough-auditing-tool-for-security. https:\/\/code.google.com\/archive\/p\/rough-auditing-tool-for-security\/.  [n.d.]. rough-auditing-tool-for-security. https:\/\/code.google.com\/archive\/p\/rough-auditing-tool-for-security\/."},{"key":"e_1_3_2_1_5_1","unstructured":"[n.d.]. scikit-learn: Machine Learning in Python. http:\/\/scikit-learn.org\/stable\/.  [n.d.]. scikit-learn: Machine Learning in Python. http:\/\/scikit-learn.org\/stable\/."},{"key":"e_1_3_2_1_6_1","unstructured":"[n.d.]. Software Composition Analysis. https:\/\/www.flexera.com\/products\/software-composition-analysis.  [n.d.]. Software Composition Analysis. https:\/\/www.flexera.com\/products\/software-composition-analysis."},{"key":"e_1_3_2_1_7_1","unstructured":"[n.d.]. Software Composition Analysis --- Veracode. https:\/\/www.veracode.com\/products\/software-composition-analysis.  [n.d.]. Software Composition Analysis --- Veracode. https:\/\/www.veracode.com\/products\/software-composition-analysis."},{"key":"e_1_3_2_1_8_1","unstructured":"[n.d.]. Vulnerability Scanner. https:\/\/www.sonatype.com\/appscan.  [n.d.]. Vulnerability Scanner. https:\/\/www.sonatype.com\/appscan."},{"key":"e_1_3_2_1_9_1","unstructured":"2019. [Security] Bump tar from 4.4.1 to 4.4.13. https:\/\/github.com\/bevry\/extendr\/commit\/306cab9a9816f137ac763b8f5ee702a67296bb65.  2019. [Security] Bump tar from 4.4.1 to 4.4.13. https:\/\/github.com\/bevry\/extendr\/commit\/306cab9a9816f137ac763b8f5ee702a67296bb65."},{"key":"e_1_3_2_1_10_1","unstructured":"2020. Use HTTPS to resolve dependencies in Maven Build. https:\/\/issues.apache.org\/jira\/browse\/GORA-642.  2020. Use HTTPS to resolve dependencies in Maven Build. https:\/\/issues.apache.org\/jira\/browse\/GORA-642."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"A. Blum and T. M. Mitchell. 1998. Combining Labeled and Unlabeled Data with Co-Training. In 11th COLT. ACM 92--100.  A. Blum and T. M. Mitchell. 1998. Combining Labeled and Unlabeled Data with Co-Training. In 11th COLT. ACM 92--100.","DOI":"10.1145\/279943.279962"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"O. Chapelle B. Sch\u00f6lkopf and A. Zien. 2006. Semi-Supervised Learning. MIT Press.  O. Chapelle B. Sch\u00f6lkopf and A. Zien. 2006. Semi-Supervised Learning. MIT Press.","DOI":"10.7551\/mitpress\/9780262033589.001.0001"},{"key":"e_1_3_2_1_13_1","volume-title":"MaL TeSQuE '17","author":"Chappelly T."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"T. Chen and C. Guestrin. 2016. XGBoost: A Scalable Tree Boosting System. In 22nd SIGKDD. ACM 785--794.  T. Chen and C. Guestrin. 2016. XGBoost: A Scalable Tree Boosting System. In 22nd SIGKDD. ACM 785--794.","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_1_15_1","volume-title":"ICSE-SEIP '20","author":"Chen Y."},{"key":"e_1_3_2_1_16_1","volume-title":"23rd ICML (ACM International Conference Proceeding Series)","volume":"148","author":"Davis J."},{"key":"e_1_3_2_1_17_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR abs\/1810.04805","author":"Devlin J.","year":"2018"},{"key":"e_1_3_2_1_18_1","unstructured":"A. V. Dorogush V. Ershov and A. Gulin. 2018. CatBoost: Gradient boosting with categorical features support. CoRR abs\/1810.11363 (2018).  A. V. Dorogush V. Ershov and A. Gulin. 2018. CatBoost: Gradient boosting with categorical features support. CoRR abs\/1810.11363 (2018)."},{"key":"e_1_3_2_1_19_1","unstructured":"D. Foo J. Yeo X. Hao and A. Sharma. 2019. The Dynamics of Software Composition Analysis. CoRR abs\/1909.00973 (2019). arXiv:1909.00973  D. Foo J. Yeo X. Hao and A. Sharma. 2019. The Dynamics of Software Composition Analysis. CoRR abs\/1909.00973 (2019). arXiv:1909.00973"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"S. M. Ghaffarian and H. R. Shahriari. 2017. Software Vulnerability Analysis and Discovery Using Machine-Learning and Data-Mining Techniques: A Survey. ACM Comput. Surv. 50 4 (2017) 56:1-56:36.  S. M. Ghaffarian and H. R. Shahriari. 2017. Software Vulnerability Analysis and Discovery Using Machine-Learning and Data-Mining Techniques: A Survey. ACM Comput. Surv. 50 4 (2017) 56:1-56:36.","DOI":"10.1145\/3092566"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"G. Grieco G. L. Grinblat L. C. Uzal S. Rawat J. Feist and L. Mounier. 2016. Toward Large-Scale Vulnerability Discovery using Machine Learning. In 6th CODASPY. ACM 85--96.  G. Grieco G. L. Grinblat L. C. Uzal S. Rawat J. Feist and L. Mounier. 2016. Toward Large-Scale Vulnerability Discovery using Machine Learning. In 6th CODASPY. ACM 85--96.","DOI":"10.1145\/2857705.2857720"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2016.12.035"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2008.239"},{"key":"e_1_3_2_1_24_1","volume-title":"FSE '19","author":"Jimenez M."},{"key":"e_1_3_2_1_25_1","first-page":"3146","article-title":"LightGBM: A Highly Efficient Gradient Boosting Decision Tree","volume":"30","author":"Ke G.","year":"2017","journal-title":"NIPS"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"A. Meneely H. Srinivasan A. Musa A. R. Tejeda M. Mokary and B. Spates. 2013. When a Patch Goes Bad: Exploring the Properties of Vulnerability-Contributing Commits. In ESEM 13. IEEE Comp. Soc. 65--74.  A. Meneely H. Srinivasan A. Musa A. R. Tejeda M. Mokary and B. Spates. 2013. When a Patch Goes Bad: Exploring the Properties of Vulnerability-Contributing Commits. In ESEM 13. IEEE Comp. Soc. 65--74.","DOI":"10.1109\/ESEM.2013.19"},{"key":"e_1_3_2_1_27_1","unstructured":"T. Mikolov K. Chen G. Corrado and J. Dean. 2013. Efficient Estimation of Word Representations in Vector Space. CoRR abs\/1301.3781 (2013). arXiv:1301.3781  T. Mikolov K. Chen G. Corrado and J. Dean. 2013. Efficient Estimation of Word Representations in Vector Space. CoRR abs\/1301.3781 (2013). arXiv:1301.3781"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"K. Nigam and R. Ghani. 2000. Analyzing the Effectiveness and Applicability of Co-training. In CIKM 00. ACM 86--93.  K. Nigam and R. Ghani. 2000. Analyzing the Effectiveness and Applicability of Co-training. In CIKM 00. ACM 86--93.","DOI":"10.1145\/354756.354805"},{"key":"e_1_3_2_1_29_1","volume-title":"AAAI '98","author":"Nigam K."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"H. Perl S. Dechand M. Smith D. Arp F. Yamaguchi K. Rieck S. Fahl and Y. Acar. 2015. VCCFinder: Finding Potential Vulnerabilities in Open-Source Projects to Assist Code Audits. In 22nd CCS. ACM 426--437.  H. Perl S. Dechand M. Smith D. Arp F. Yamaguchi K. Rieck S. Fahl and Y. Acar. 2015. VCCFinder: Finding Potential Vulnerabilities in Open-Source Projects to Assist Code Audits. In 22nd CCS. ACM 426--437.","DOI":"10.1145\/2810103.2813604"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"M. E. Peters M. Neumann M. Iyyer M. Gardner C. Clark K. Lee and L. Zettlemoyer. 2018. Deep contextualized word representations. CoRR abs\/1802.05365 (2018).  M. E. Peters M. Neumann M. Iyyer M. Gardner C. Clark K. Lee and L. Zettlemoyer. 2018. Deep contextualized word representations. CoRR abs\/1802.05365 (2018).","DOI":"10.18653\/v1\/N18-1202"},{"key":"e_1_3_2_1_32_1","first-page":"1","article-title":"Preliminary comparison of techniques for dealing with imbalance in software defect prediction. In 18th EASE","volume":"43","author":"Rodr\u00edguez D.","year":"2014","journal-title":"ACM"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"C. Rosenberg M. Hebert and H. Schneiderman. 2005. Semi-Supervised Self-Training of Object Detection Models. In 7th WACV\/MOTION. IEEE Comp. Soc. 29--36.  C. Rosenberg M. Hebert and H. Schneiderman. 2005. Semi-Supervised Self-Training of Object Detection Models. In 7th WACV\/MOTION. IEEE Comp. Soc. 29--36.","DOI":"10.1109\/ACVMOT.2005.107"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"A. Sabetta and M. Bezzi. 2018. A Practical Approach to the Automatic Classification of Security-Relevant Commits. In 34th ICSME. IEEE Comp. Soc.  A. Sabetta and M. Bezzi. 2018. A Practical Approach to the Automatic Classification of Security-Relevant Commits. In 34th ICSME. IEEE Comp. Soc.","DOI":"10.1109\/ICSME.2018.00058"},{"key":"e_1_3_2_1_35_1","unstructured":"V. Smolyakov. [n.d.]. Ensemble Learning to Improve Machine Learning Results. https:\/\/blog.statsbot.co\/ensemble-learning-d1dcd548e936.  V. Smolyakov. [n.d.]. Ensemble Learning to Improve Machine Learning Results. https:\/\/blog.statsbot.co\/ensemble-learning-d1dcd548e936."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0218001409007326"},{"key":"e_1_3_2_1_37_1","volume-title":"Approximating Attack Surfaces with Stack Traces. In 37th ICSE","volume":"2","author":"Theisen C."},{"key":"e_1_3_2_1_38_1","volume-title":"ICSE-SEIP '17","author":"Theisen C."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Y. Tian J. L. Lawall and D. Lo. 2012. Identifying Linux bug fixing patches. In 34th ICSE. IEEE 386--396.  Y. Tian J. L. Lawall and D. Lo. 2012. Identifying Linux bug fixing patches. In 34th ICSE. IEEE 386--396.","DOI":"10.1109\/ICSE.2012.6227176"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACSAC.2000.898880"},{"key":"e_1_3_2_1_41_1","volume-title":"Automated Vulnerability Detection System Based on Commit Messages. Master's thesis","author":"Wan L."},{"key":"e_1_3_2_1_42_1","volume-title":"IJCNN '10","author":"Wang S."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TR.2013.2259203"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007730.1007734"},{"key":"e_1_3_2_1_45_1","unstructured":"D. A. Wheeler. [n.d.]. Flawfinder Home Page. https:\/\/www.dwheeler.com\/flawfinder\/.  D. A. Wheeler. [n.d.]. Flawfinder Home Page. https:\/\/www.dwheeler.com\/flawfinder\/."},{"key":"e_1_3_2_1_46_1","volume-title":"5th","author":"Wijayasekara D."},{"key":"e_1_3_2_1_47_1","volume-title":"Vulnerability Extrapolation: Assisted Discovery of Vulnerabilities Using Machine Learning. In 5th WOOT. USENIX Assoc., 118--127.","author":"Yamaguchi F.","year":"2011"},{"key":"e_1_3_2_1_48_1","volume-title":"33rd ACL","author":"Yarowsky D."},{"key":"e_1_3_2_1_49_1","unstructured":"M. Zalewski. [n.d.]. American Fuzzy Lop (2.52b). http:\/\/lcamtuf.coredump.cx\/afl\/.  M. Zalewski. [n.d.]. American Fuzzy Lop (2.52b). http:\/\/lcamtuf.coredump.cx\/afl\/."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Y. Zhou and A. Sharma. 2017. Automated identification of security issues from commit messages and bug reports. In 11th FSE. ACM 914--919.  Y. Zhou and A. Sharma. 2017. Automated identification of security issues from commit messages and bug reports. In 11th FSE. ACM 914--919.","DOI":"10.1145\/3106237.3117771"}],"event":{"name":"MSR '20: 17th International Conference on Mining Software Repositories","location":"Seoul Republic of Korea","acronym":"MSR '20","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 17th International Conference on Mining Software Repositories"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3379597.3387461","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3379597.3387461","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:41:20Z","timestamp":1750200080000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3379597.3387461"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,29]]},"references-count":50,"alternative-id":["10.1145\/3379597.3387461","10.1145\/3379597"],"URL":"https:\/\/doi.org\/10.1145\/3379597.3387461","relation":{},"subject":[],"published":{"date-parts":[[2020,6,29]]},"assertion":[{"value":"2020-09-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}