Skip to content
Snippets Groups Projects
Commit 006ee9a7 authored by Raphael Maas's avatar Raphael Maas
Browse files

updated file remover

parent a9817790
Branches
Tags v1.14.3
No related merge requests found
......@@ -6,8 +6,46 @@ Created on Thu Jan 9 22:46:19 2020
"""
import glob
from shutil import copy2
list_of_all_doublefiles = glob.glob(r"downloads_sample\13D_filings_dirty\*(1)*.txt")
def write_nr_of_duplicates_to_file():
list_of_all_duplicates = glob.glob(r"downloads\13D_filings_dirty\*(1)*.txt")
list_of_all_files = glob.glob(r"downloads\13D_filings_dirty\*.txt")
nr_of_duplicates = len(list_of_all_duplicates)
nr_of_all_files = len(list_of_all_files)
with open (r"downloads\Duplicates_13D.txt", "w") as file:
file.write(f"--Duplicate 13D files--\r\nNumber of duplicates: {nr_of_duplicates}\r\n")
file.write(f"Number of total files: {nr_of_all_files}\r\n")
file.write(f"Percentage of dupblicates: {str(nr_of_duplicates/nr_of_all_files*100.00)} %\r\n\r\n")
for duplicate in list_of_all_duplicates:
file.write(duplicate.split("\\")[-1] + "\r\n")
# stage 1
def copy_without_duplicates():
all_files = glob.glob(r"downloads\13D_filings_dirty\*.txt")
for index, file in enumerate(all_files):
print(str(index))
if "(1)" not in file:
copy2(file, r"downloads\13D_filings_clean")
# stage 2
def copy_without_empty_files():
all_files = glob.glob(r"downloads_sample\13D_filings_stage1\*.txt")
with open (r"downloads\EmptyFiles_13D.txt", "w") as report_file:
report_file.write(f"--Empty 13D files--\r\n")
for index, file in enumerate(all_files):
content = file.read()
print(str(index))
if "Item" in content or "Identity and Background" in content:
copy2(file, r"downloads\13D_filings_stage2")
else:
report_file.write(file.split("\\")[-1] + "\r\n")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment