| | 164 | |
| | 165 | = AnnotateListOfChromosomePositionFilesWithGOFromBioMartEnsembl = |
| | 166 | Create Gene Ontology (http://www.geneontology.org/) annotation for a list of files that contain at least a position column and a chromosome column. |
| | 167 | |
| | 168 | == Parameters == |
| | 169 | * listOfFilesToAnnotate: Python list of filenames to be annotated |
| | 170 | * numberOfFirstLinesToIgnoreInFileToAnnotate: |
| | 171 | * chromosomeColumnOfFilesToAnnotate: The # of the chromosome column in the file to be annotated (starting from 0) |
| | 172 | * positionColumnOfFilesToAnnotate: The # of the position column in the file to be annotated (starting from 0) |
| | 173 | * resolveDuplicateValuesFunctionInFileToBeAnnotated: What should we do if we found 2 lines in the file to be annotated that has the same position and chromosome? If not set to None it will call the function assigned to this parameter |
| | 174 | * fileWithGOAnnotation: The file that has been downloaded from BioMart and contains the GO annotation. |
| | 175 | * fileWithGOAnnotationChromosomeColumn: The column that contain the chromosome in the fileWithGOAnnotation |
| | 176 | * fileWithGOAnnotationStartColumn: The column that contain the start of the transcript in the fileWithGOAnnotation |
| | 177 | * fileWithGOAnnotationEndColumn: The columns that contain the end of the transcript in the fileWithGOAnnotation |
| | 178 | * columnsWithGOAnnotationComaSeparated: The columns that contain the annotations that we want to add in the fileWithGOAnnotation. Example: "2,3,4" |
| | 179 | * numberOfFirstLinesToIgnoreInGOAnnotationFile |
| | 180 | * outputDirectory |
| | 181 | * outputSuffix: The output file will be: outputDirectory/(basename of inputFile)+outputSuffix |
| | 182 | |
| | 183 | == Example == |
| | 184 | {{{ |
| | 185 | #!div style="font-size: 80%" |
| | 186 | Code highlighting: |
| | 187 | {{{#!python |
| | 188 | fileList= [ |
| | 189 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_1.txt", |
| | 190 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_2.txt", |
| | 191 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_3.txt", |
| | 192 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_4.txt", |
| | 193 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_5.txt", |
| | 194 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_6.txt", |
| | 195 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_7.txt", |
| | 196 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_8.txt", |
| | 197 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_9.txt", |
| | 198 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_10.txt", |
| | 199 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_11.txt", |
| | 200 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_12.txt", |
| | 201 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_13.txt", |
| | 202 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_14.txt", |
| | 203 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_15.txt", |
| | 204 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_16.txt", |
| | 205 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_17.txt", |
| | 206 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_18.txt", |
| | 207 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_19.txt", |
| | 208 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_20.txt", |
| | 209 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_21.txt", |
| | 210 | "/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02/tab_22.txt" |
| | 211 | ] |
| | 212 | |
| | 213 | AnnotateListOfChromosomePositionFilesWithGOFromBioMartEnsembl( |
| | 214 | listOfFilesToAnnotate=fileList, |
| | 215 | numberOfFirstLinesToIgnoreInFileToAnnotate=1, |
| | 216 | chromosomeColumnOfFilesToAnnotate=2, |
| | 217 | positionColumnOfFilesToAnnotate=3, |
| | 218 | fileWithGOAnnotation="/Users/alexandroskanterakis/Data/Ensembl/GENE_START_END_GO_FROM_ENSEMBL_36.txt", |
| | 219 | fileWithGOAnnotationChromosomeColumn=1, |
| | 220 | fileWithGOAnnotationStartColumn=2, |
| | 221 | fileWithGOAnnotationEndColumn=3, |
| | 222 | columnsWithGOAnnotationComaSeparated="4,5,6,7,8,9", |
| | 223 | numberOfFirstLinesToIgnoreInGOAnnotationFile=1, |
| | 224 | outputDirectory="/Users/alexandroskanterakis/Data/CD_china/genomeWideExcluding/genomeWideExcluding360-02", |
| | 225 | outputSuffix="_GO.txt" |
| | 226 | ) |
| | 227 | }}} |
| | 228 | }}} |
| | 229 | |
| | 230 | == Source Code == |
| | 231 | http://www.bbmriwiki.nl/svn/SequenceAnnotation/Scripts/AnnotateListOfChromosomePositionFilesWithGOFromBioMartEnsembl.py |