Script to replace contents inside the files
Kunal Jamdade
kunal123jamdade at gmail.com
Mon Jul 24 00:57:25 EDT 2017
I have thousands of html files inside a folder. I want to replace the
filename present inside another files. Say for ex:- fileName :-
'abcd1234.html' is found inside another file say file2.html. Then I want to
remove the last 4 digits of the fileName i.e,. 'abcd1234.html' =>
'abcd.htm'.
I have tried a script . But your suggestions upon the script are welcomed.
Regards,
Kunal
-------------- next part --------------
import os
import re
def script_to_create_folder():
path_list = []
filename_list = []
path = r'D:\macrocodesrequired\Testing_Script\Real_testing_\New folder\brpt'
#path = r'H:\Script_Work\New_folder\Actual_testing\brpt'
for (root, dirs, name) in os.walk(path):
for nm in name:
if (
'About' in root or 'Community' in root or 'support' in root \
or 'home' in root or 'Products' in root or 'service' in root \
or 'solutions' in root or 'training' in root \
or 'wheretobuy' in root
):
pass
if (
'default' in nm or 'index' in nm or 'category' in nm \
or 'Category' in nm or 'Default' in nm or 'Index' in nm \
or 'home' in nm or 'support' in nm
):
pass
else:
filename_list.append(nm)
path_list.append(os.path.join(root, nm))
# print(path_list)
# print(filename_list)
for path in path_list:
for names in filename_list:
find_filename_inside_files(names, path)
def find_filename_inside_files(file_name, dir_path):
pattern_list = ['\d+$', '\d+\w$', '\d+-\d$', '\w\d+$', '\d\w\d\w', '\w\d+$', '\w\d\w\d']
data = []
replace_str = ''
read_cnt = 0
digits_to_replace = 0
with open(dir_path, 'r', encoding='utf-8') as file_handle:
data = file_handle.read()
#print(data)
if file_name in data:
#print(file_name)
for search_pattern in pattern_list:
read_cnt = 0
if '-' in file_name:
#print("===>",search_pattern)
if re.search(search_pattern, file_name.split('.')[0]):
digits_to_replace = filename_with_hypen(file_name, search_pattern)
read_cnt = 1
position = file_handle.tell()
replace_str = replace_oldstring_newstring(
data,
file_name,
digits_to_replace
)
# file_handle.seek(0, 0)
# file_handle.write(replace_str)
elif re.search(search_pattern, file_name.split('.')[0]):
digits_to_replace = filename_without_hypen(file_name, search_pattern)
read_cnt = 1
replace_str = replace_oldstring_newstring(data, file_name, digits_to_replace)
if read_cnt == 1:
#print("write to")
print(file_name)
print(dir_path)
with open(dir_path, 'w', encoding='utf-8') as file_out:
file_out.write(replace_str)
exit()
def filename_without_hypen(file_name, pattern):
#print(file_name)
value = re.search(pattern, file_name.split('.')[0])
if bool(value):
last_digits = value.group()
if len(last_digits) > 2:
return -(len(last_digits))
elif len(last_digits) > 0 and len(last_digits) <= 3:
return -(len(last_digits))
def filename_with_hypen(file_name, pattern):
value = re.search(pattern, file_name.split('.')[0])
if bool(value):
last_digits = value.group()
if '-2' in last_digits or '-3' in last_digits:
return -(len(last_digits))
else:
return -(len(last_digits))
def replace_oldstring_newstring(data, filename, last_digits_to_replace):
print("in replace")
ind = data.index(filename)
temp_str = data[ind:(ind + len(filename))]
replace_str = data.replace(temp_str.split('.')[0][last_digits_to_replace:], '')
replace_str = replace_str.replace(".html", ".htm")
return replace_str
def main():
script_to_create_folder()
if __name__ == '__main__':
main()
More information about the Python-list
mailing list