diff --git a/Data/Database/db_repository.py b/Data/Database/db_repository.py index ffc3022535b9a58e2960b25c1bd72e023f565e32..8fff2f717c8c2ba6d663020f85af7ad8c766bc23 100644 --- a/Data/Database/db_repository.py +++ b/Data/Database/db_repository.py @@ -11,6 +11,15 @@ from Data.Database.constants import MYSQL_CONFIG class DBRepository: """Class that abstracts DB operations""" + initial_commits = [ + # Linux + "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2", + # Chromium + "09911bf300f1a419907a9412154760efd0b7abc3", + # Chromium + "a814a8d55429605fe6d7045045cd25b6bf624580", + ] + def __init__(self): self._open() @@ -475,7 +484,6 @@ class DBRepository: cursor = self.cnx.cursor() try: - # cursor.execute("SET FOREIGN_KEY_CHECKS = 0;") # Update the mapping if it exists update = ( "UPDATE `link_fixing_commit_vcc` " @@ -494,7 +502,6 @@ class DBRepository: "mapping_id": mapping_id, }, ) - # cursor.execute("SET FOREIGN_KEY_CHECKS = 1;") self.cnx.commit() except mysql.connector.Error as err: @@ -585,29 +592,35 @@ class DBRepository: return result - def remove_linux_initial_commit(self): + def remove_initial_commits(self): """ - Removes the initial commit of the Linux kernel from the database. + Removes the initial commits from the database. A significant portion of commits in the data point to the Linux kernel's initial commit. This initial commit comprises over 6 million changes across more than 17 thousand files, rendering it unsuitable for use as a practical VCC. Consequently, remove this commit from the database where it was utilized. + This also counts for the initial commit of the Chromium project that also comprises a lot of changes. + In general removed all initial commits which are defined in the ``initial_commits`` list. """ if not self.cnx.is_connected(): self._open() cursor = self.cnx.cursor() - initial_commit = "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2" + # Get the initial commit of the Chromium project + initial_commits = tuple(i for i in self.initial_commits) try: # We have to disable foreign key checks to be able to update the table cursor.execute("SET FOREIGN_KEY_CHECKS = 0;") update_vcc = ( - "UPDATE `link_fixing_commit_vcc` SET `vcc_sha` = NULL, `determined_by_heuristic` = TRUE " - "WHERE `vcc_sha` = %(initial_commit)s " + "UPDATE `link_fixing_commit_vcc` SET `vcc_sha` = NULL, `determined_by_heuristic` = TRUE WHERE" + f"{'`vcc_sha` = %s OR ' * (len(initial_commits) - 1)} `vcc_sha` = %s;" + ) + cursor.execute( + update_vcc, + initial_commits, ) - cursor.execute(update_vcc, {"initial_commit": initial_commit}) cursor.execute("SET FOREIGN_KEY_CHECKS = 1;") self.cnx.commit() except mysql.connector.Error as err: diff --git a/Data/Utils/CleanupUtils.py b/Data/Utils/CleanupUtils.py index 76cc2489825850021a5b846b902caa56a3bdb72d..c3f0c3a5f98fc35656e80101d6777fbeffcc2d64 100644 --- a/Data/Utils/CleanupUtils.py +++ b/Data/Utils/CleanupUtils.py @@ -3,7 +3,7 @@ from Data.Database.db_repository import DBRepository def main(): db_repo = DBRepository() - db_repo.remove_linux_initial_commit() + db_repo.remove_initial_commits() if __name__ == "__main__":