import time import string import sys start_time = time.time() def generate_gene_lists(core_file_name, list_file_name): #A function for generating lists of core genes and genes to rank. temp_core = [] temp_list = [] core_file = open(core_file_name, 'r') list_file = open(list_file_name, 'r') for line in core_file: gene_name = line.strip() print gene_name temp_core.append(gene_name) core_file.close() for line in list_file: gene_name = line.strip() temp_list.append(gene_name) list_file.close() return temp_core, temp_list def extract_from_core_gene(temp_core_path, temp_gene_list, temp_core_gene): #A function for extracting a list of genes from a core gene connectome and then sorting by distance. temp_list = [] flag = 0 print "Prioritizing list in core gene " + temp_core_gene + "\tseconds from start:\t" + str(time.time()-start_time) header = "Target\tDistance\tRank\tP-value(percentile)\tBRP\tMedian_ratio\tAverage_ratio\tSphere\tRoute\tDegrees_separation\n" for gene in temp_gene_list: try: temp_core_file = open(temp_core_path, 'r') counter = 1 flag = 0 for line in temp_core_file: counter = counter + 1 line_stripped = line.strip() words = line_stripped.split("\t") temp_gene = words[0] found_flag = 0 if (gene == temp_gene): #Gene found in connectome found_flag = 1 words[1] = float(words[1]) words[2] = int(words[2]) words[3] = float(words[3]) words[4] = float(words[4]) words[5] = float(words[5]) words[6] = float(words[6]) words[7] = int(words[7]) words[9] = int(words[9]) temp_list.append(words) if (counter > 1): to_write = '\t'.join(str(e) for e in words) to_write = temp_core_gene + "\t" + to_write + "\n" file1.write(to_write) flag = 1 break temp_core_file.close() except: flag = -1 #Connectome file doesn't exist ###########Main program######### HGC_path = '/Volumes/SSD2/Connectome/10.0/All_connectomes_v10.0/' #Path to all gene specific connectomes core_list, gene_list = generate_gene_lists("core_genes.txt", "candidate_genes.txt") #Create lists of core genes and genes to be ranked #print core_list file1 = open("candidates_in_core_genes.txt", 'w') header = "Core\tTarget\tDistance\tRank\tP-value(percentile)\tBRP\tMedian_ratio\tAverage_ratio\tSphere\tRoute\tDegrees_separation\n" file1.write(header) for core_gene in core_list: #Going through all core genes core_path = HGC_path + core_gene + ".txt" extract_from_core_gene(core_path, gene_list, core_gene) print "Run complete in:\t" + str(time.time()-start_time) + "\tseconds" file1.close()