import re import os import urlparse, urllib import os.path from os.path import join, getsize import sys #import string from fractions import gcd import glob from select import select import difflib import subprocess import shlex # Some imports are fluff to have them present for allowing user interaction with file replacement # However it seems that difflib works well enough that I choose just to accept what is found # though there is a commented hook for doing some kind of "getchar()" # The program exists because I like to keep current projects on the desktop and then bump them # out of the way when something new takes higher priority. where_to_search='/home/mo/Desktop' edit_files_checked=0 sessions_checked=0 missing_files=0 splitcode=r'file://' pattern = re.compile(splitcode) def find_files(file_name_to_find): for root,_,_ in os.walk(where_to_search): for found_file_name in glob.iglob(os.path.join(where_to_search,root,file_name_to_find)): yield found_file_name ### def url_split_strip(queryString): parse_result = urlparse.urlsplit(queryString) return parse_result.path.rstrip() ### def replace(f,startof,newname,splitcode,endof): f.write(startof+splitcode+newname+endof+'\n') ### def walk_about_directory(): for root, dirs, files in os.walk('.'): print root, "consumes", print sum(getsize(join(root, name)) for name in files), print "bytes in", len(files), "non-directory files" if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories ### def show_statistics(): print sessions_checked," sessions checked ", print edit_files_checked," session files checked ", print missing_files," missing" print "%2.2f " % (edit_files_checked/float(sessions_checked)), print " average files per session ", # I admit this is pure fluff from any perspective, but I like GCD d=gcd(sessions_checked,edit_files_checked) print 'and GCD is',d,' and resulting relationship is ',sessions_checked/d,"/",edit_files_checked/d ### #Where it start to do some work. for session_file in os.listdir("."): if session_file.endswith(".katesession"): replace_entire_session_flag=0 f=open('tmp','w') sessions_checked=sessions_checked+1 for i, line in enumerate(open(session_file)): replace_line_flag=0 for match in re.finditer(pattern, line): line_end_char='' file_name_plus=line.split(splitcode)[1] line_start=line.split(splitcode)[0] if file_name_plus[-1] == '\n': file_name_plus=file_name_plus[:-1] if file_name_plus[-1] == ']': file_name_plus=file_name_plus[:-1] line_end_char=']' file_name_only_stripped=url_split_strip(file_name_plus) drive, path = os.path.splitdrive(file_name_only_stripped) path, filename = os.path.split(path) edit_files_checked=edit_files_checked+1 if not os.path.exists(file_name_only_stripped): missing_files=missing_files+1 print 'Missing ',file_name_only_stripped,' in ', session_file,' at line %s: with groups %s ' % (i+1, match.groups()) list_of_possible_files= list(find_files(filename)) closest_match_files=difflib.get_close_matches(file_name_only_stripped,list_of_possible_files,1) try: # perhap an if would work too existential_check=closest_match_files[0] print "REPLACE ",line," WITH ",line_start+splitcode+closest_match_files[0]+line_end_char user_input_char = "y" #getchar() except: user_input_char = "n" if user_input_char == "q" : exit(0) if user_input_char == "y" : replace(f,line_start,closest_match_files[0],splitcode,line_end_char) replace_entire_session_flag=1 replace_line_flag=1 else: print "NOT replacing line (No near match found)" print line if replace_line_flag == 1: replace_line_flag=0 else: f.write(line) if replace_entire_session_flag == 1: f.close() os.remove(session_file) os.rename('tmp',session_file) else: f.close() show_statistics()
0 comments:
Post a Comment