def GetNumberOfColumnsOfTabularFile(filename=None, separator="\t"): file = open(filename) toReturn = len(file.readline().split(separator)) file.close() return toReturn def MergeHorizontallyFilesAccordingToCommonColumns( listOfFilenamesToBeAnnotated=None, listOfColumnsFromFileToBeAnnotated=None, listOfListsOfInputFilenames=None, listOfAnnotationFileColumns=None, listOfFirstLinesToIgnore=None, listOfOutputFilenames=None ): #If None list of columns is provided then we take all columns if listOfColumnsFromFileToBeAnnotated == None: listOfColumnsFromFileToBeAnnotated=range(GetNumberOfColumnsOfTabularFile(listOfFilenamesToBeAnnotated[0])) for index, filenameToBeAnnotated in enumerate(listOfFilenamesToBeAnnotated): print "Annotating file: ", filenameToBeAnnotated outputFile = open(listOfOutputFilenames[index], "w") fileToBeAnnotated = open(filenameToBeAnnotated) listOfAnnotationFiles = [] for indexOfInputFilenames, listOfInputFilenames in enumerate(listOfListsOfInputFilenames): print "Annotating from: ", listOfInputFilenames[index] currentOpenedFile = open(listOfInputFilenames[index]) #Skip first lines if listOfFirstLinesToIgnore != None: for x in xrange(listOfFirstLinesToIgnore[indexOfInputFilenames]): currentOpenedFile.readline() listOfAnnotationFiles += [currentOpenedFile] lineCounter = 0 while True: lineOfFileToBeAnnotated = fileToBeAnnotated.readline() if lineOfFileToBeAnnotated == "": break lineCounter += 1 lineOfFileToBeAnnotatedSplitted = lineOfFileToBeAnnotated.replace("\n", "").split("\t") toPrint = [] for columnFromFileToBeAnnotated in listOfColumnsFromFileToBeAnnotated: if columnFromFileToBeAnnotated >= len(lineOfFileToBeAnnotatedSplitted): cell = "" else: cell = lineOfFileToBeAnnotatedSplitted[columnFromFileToBeAnnotated] toPrint += [cell] for annotationIndex, annotationFile in enumerate(listOfAnnotationFiles): lineOfAnnotationFileSplitted = annotationFile.readline().replace("\n", "").split("\t") for columnOfAnnotationFile in listOfAnnotationFileColumns[annotationIndex]: if columnOfAnnotationFile >= len(lineOfAnnotationFileSplitted): cell = "" else: cell = lineOfAnnotationFileSplitted[columnOfAnnotationFile] toPrint += [cell] outputFile.write(str.join("\t", toPrint) + "\n") outputFile.close()