Script to replace contents inside the files

Mon Jul 24 00:57:25 EDT 2017

I have thousands of html files inside a folder. I want to replace the
filename present inside another files. Say for ex:- fileName :-
'abcd1234.html' is found inside another file say file2.html. Then I want to
remove the last 4 digits of the fileName i.e,. 'abcd1234.html' =>
'abcd.htm'.

I have tried a script . But your suggestions upon the script are welcomed.

Regards,
Kunal
-------------- next part --------------
import os
import re

def script_to_create_folder():
    path_list = []
    filename_list = []
    path = r'D:\macrocodesrequired\Testing_Script\Real_testing_\New folder\brpt'
    #path = r'H:\Script_Work\New_folder\Actual_testing\brpt'

    for (root, dirs, name) in os.walk(path):
        for nm in name:
            if (
                'About' in root or 'Community' in root or 'support' in root \
                or 'home' in root or 'Products' in root or 'service' in root \
                or 'solutions' in root or 'training' in root \
                or 'wheretobuy' in root
            ):
                pass
            if (
                'default' in nm or 'index' in nm or 'category' in nm \
                or 'Category' in nm or 'Default' in nm or 'Index' in nm \
                or 'home' in nm or 'support' in nm
            ):
                pass
            else:
                filename_list.append(nm)
                path_list.append(os.path.join(root, nm))
    # print(path_list)
    # print(filename_list)

    for path in path_list:
        for names in filename_list:
            find_filename_inside_files(names, path)

def find_filename_inside_files(file_name, dir_path):
    pattern_list = ['\d+$', '\d+\w$', '\d+-\d$', '\w\d+$', '\d\w\d\w', '\w\d+$', '\w\d\w\d']
    data = []
    replace_str = ''
    read_cnt = 0
    digits_to_replace = 0
    with open(dir_path, 'r', encoding='utf-8') as file_handle:
        data = file_handle.read()
        #print(data)
        if file_name in data:
            #print(file_name)
            for search_pattern in pattern_list:
                read_cnt = 0
                if '-' in file_name:
                    #print("===>",search_pattern)
                    if re.search(search_pattern, file_name.split('.')[0]):
                        digits_to_replace = filename_with_hypen(file_name, search_pattern)
                        read_cnt = 1
                        position = file_handle.tell()
                        replace_str = replace_oldstring_newstring(
                                                                data,
                                                                file_name,
                                                                digits_to_replace
                                                                )
                # file_handle.seek(0, 0)
                # file_handle.write(replace_str)
                elif re.search(search_pattern, file_name.split('.')[0]):
                    digits_to_replace = filename_without_hypen(file_name, search_pattern)
                    read_cnt = 1
                    replace_str = replace_oldstring_newstring(data, file_name, digits_to_replace)

    if read_cnt == 1:
        #print("write to")
        print(file_name)
        print(dir_path)
        with open(dir_path, 'w', encoding='utf-8') as file_out:
            file_out.write(replace_str)
        exit()

def filename_without_hypen(file_name, pattern):
    #print(file_name)
    value = re.search(pattern, file_name.split('.')[0])
    if bool(value):
        last_digits = value.group()
        if len(last_digits) > 2:
            return -(len(last_digits))
        elif len(last_digits) > 0 and len(last_digits) <= 3:
            return -(len(last_digits))

def filename_with_hypen(file_name, pattern):
    value = re.search(pattern, file_name.split('.')[0])

    if bool(value):
        last_digits = value.group()
        if '-2' in last_digits or '-3' in last_digits:
            return -(len(last_digits))
        else:
            return -(len(last_digits))

def replace_oldstring_newstring(data, filename, last_digits_to_replace):
    print("in replace")

    ind = data.index(filename)

    temp_str = data[ind:(ind + len(filename))]

    replace_str = data.replace(temp_str.split('.')[0][last_digits_to_replace:], '')

    replace_str = replace_str.replace(".html", ".htm")

    return replace_str

def main():
    script_to_create_folder()

if __name__ == '__main__':
    main()