Attachment 'GRID.py'

# Copyright 2007 by Jake Feala.  All rights reserved.

"""
This module provides code to work with the
BIOGRID-ORGANISM-<organism>-X.X.XX.tab.txt
flat file from BioGRID.
http://www.thebiogrid.org/downloads.php

Classes:
GRIDIterator            Iterates over entries in a GRID file.
GRIDParser              Parses a GRID record into an InteractionRecord object.

_GRIDScanner            Scans BioGRID-formatted data from a flat-file
_GRIDRecordConsumer     Consumes BioGRID data to an InteractionRecord object.

"""

import re
from Bio import File
from Bio.ParserSupport import *
from InteractionRecord import InteractionRecord

class GRIDIterator:
    """Returns one record at a time from a GRID file.

    Methods:
    next   Return the next record from the stream, or None.

    """
    def __init__(self, handle, parser=None):
        """__init__(self, handle, parser=None)

        Create a new iterator.  handle is a file-like object.  parser
        is an optional Parser object to change the results into another form.
        If set to None, then the raw contents of the file will be returned.

        """
        if type(handle) is not FileType and type(handle) is not InstanceType:
            raise ValueError, "I expected a file handle or file-like object"
        self._uhandle = File.UndoHandle(handle)
        self._parser = parser
        if self._parser is None:
            self._parser = GRIDParser()
        return self._parser.parse_header(self._uhandle)

    def next(self):
        """next(self) -> object

        Return the next record from the file.  If no more records,
        return None.

        """
        if self._parser is not None:
            try:
                return self._parser.parse(self._uhandle)
            except SyntaxError:
                raise StopIteration
        return None

    def __iter__(self):
        return iter(self.next, None)


class GRIDParser(AbstractParser):
    """Parses BioGRID line into an InteractionRecord object.

    """
    def __init__(self):
        self._scanner = _GRIDScanner()
        self._consumer = _GRIDRecordConsumer()

    def parse_header(self, handle):
        self._scanner.header(handle,self._consumer)
        return None

    def parse(self, handle):
        self._scanner.feed(handle, self._consumer)
        return self._consumer.data
    

class _GRIDScanner:
    """Scans BioGRID-formatted data
    """

    def header(self,handle,consumer):
        if isinstance(handle, File.UndoHandle):
            uhandle = handle
        else:
            uhandle = File.UndoHandle(handle)
            
        # scan header (no tab-delimited columns)        
        record_re = re.compile("(.*\t){10}")
        n_lines = read_and_call_until(uhandle,consumer.header,has_re=record_re)

        # one line of field headings        
        read_and_call(uhandle,consumer.field_names)   

    def feed(self, handle, consumer):
        """feed(self, handle, consumer)

        Feed in BioGRID data for scanning.  handle is a file-like
        object that contains GRID data.  consumer is a
        Consumer object that will receive events as the report is scanned.

        """
        if isinstance(handle, File.UndoHandle):
            uhandle = handle
        else:
            uhandle = File.UndoHandle(handle)

        consumer.start_record()
        read_and_call(uhandle, consumer.interaction)  #GRID interaction record
        consumer.end_record()

class _GRIDRecordConsumer(AbstractConsumer):
    """Consumer that converts a BioGRID entry to an InteractionRecord object.

    Members:
    data    InteractionRecord with BioGRID data.

    """
    def __init__(self):
        self.data = None
        self.cgexp = re.compile('CG\d+')
        self.fbexp = re.compile('FBgn\d+')
        
    def start_record(self):
        self.data = InteractionRecord()
        self.field_mapping = range(11)
        self.data.db = 'GRID'
        
    def end_record(self):
        self.data.id = hash(self.data.source+self.data.type+self.data.target)
        pass
    
    def header(self, line):
        """In the future you could initialize the consumer using info in header"""        
        #self.field_mapping = []
        pass
    
    def field_names(self, line):
        """Field names could be saved"""
        pass
    
    def interaction(self, line):
        """Splits interaction record into fields and maps them to consumer fns"""
        fields = line.split('\t')
        for i,field in enumerate(fields):
            fn = self._field_fns[self.field_mapping[i]]
            fn(self,field)       
            
    def source(self, field):
        gene = self._drosophila_gene(field)
        self.data.source = gene

    def target(self, field):
        gene = self._drosophila_gene(field)
        self.data.target = gene

    def source_symbol(self,field):
        gene = self._drosophila_gene(field)
        self.data.source_aliases.append(gene)

    def target_symbol(self,field):
        gene = self._drosophila_gene(field)
        self.data.target_aliases.append(gene)
    
    def source_aliases(self,field):
        for alias in field.split('|'):
            if alias != 'N/A':
                gene = self._drosophila_gene(alias)
                self.data.source_aliases.append(gene)

    def target_aliases(self,field):
        for alias in field.split('|'):            
            if alias != 'N/A':
                gene = self._drosophila_gene(alias)
                self.data.target_aliases.append(gene)
        
    def experimental_system(self,field):
        self.data.type = field            

    def study(self,field):
        pass

    def PMID(self,field):
        self.data.PMID = field

    def source_taxonomy_id(self,field):
        self.data.source_taxon_id = field
    
    def target_taxonomy_id(self,field):
        self.data.target_taxon_id = field

    def _drosophila_gene(self,gene):
        match = self.cgexp.search(gene)
        if match:
            return match.group()
        match = self.fbexp.search(gene)
        if match:
            return match.group()
        return gene
                         
    _field_fns = [source,              
                  target,              
                  source_symbol,       
                  target_symbol,       
                  source_aliases,      
                  target_aliases,      
                  experimental_system, 
                  study,               
                  PMID,                
                  source_taxonomy_id,  
                  target_taxonomy_id,  
                  ]

You are not allowed to view this page.