Attachment 'GRID.py'
# Copyright 2007 by Jake Feala. All rights reserved.
"""
This module provides code to work with the
BIOGRID-ORGANISM-<organism>-X.X.XX.tab.txt
flat file from BioGRID.
http://www.thebiogrid.org/downloads.php
Classes:
GRIDIterator Iterates over entries in a GRID file.
GRIDParser Parses a GRID record into an InteractionRecord object.
_GRIDScanner Scans BioGRID-formatted data from a flat-file
_GRIDRecordConsumer Consumes BioGRID data to an InteractionRecord object.
"""
import re
from Bio import File
from Bio.ParserSupport import *
from InteractionRecord import InteractionRecord
class GRIDIterator:
"""Returns one record at a time from a GRID file.
Methods:
next Return the next record from the stream, or None.
"""
def __init__(self, handle, parser=None):
"""__init__(self, handle, parser=None)
Create a new iterator. handle is a file-like object. parser
is an optional Parser object to change the results into another form.
If set to None, then the raw contents of the file will be returned.
"""
if type(handle) is not FileType and type(handle) is not InstanceType:
raise ValueError, "I expected a file handle or file-like object"
self._uhandle = File.UndoHandle(handle)
self._parser = parser
if self._parser is None:
self._parser = GRIDParser()
return self._parser.parse_header(self._uhandle)
def next(self):
"""next(self) -> object
Return the next record from the file. If no more records,
return None.
"""
if self._parser is not None:
try:
return self._parser.parse(self._uhandle)
except SyntaxError:
raise StopIteration
return None
def __iter__(self):
return iter(self.next, None)
class GRIDParser(AbstractParser):
"""Parses BioGRID line into an InteractionRecord object.
"""
def __init__(self):
self._scanner = _GRIDScanner()
self._consumer = _GRIDRecordConsumer()
def parse_header(self, handle):
self._scanner.header(handle,self._consumer)
return None
def parse(self, handle):
self._scanner.feed(handle, self._consumer)
return self._consumer.data
class _GRIDScanner:
"""Scans BioGRID-formatted data
"""
def header(self,handle,consumer):
if isinstance(handle, File.UndoHandle):
uhandle = handle
else:
uhandle = File.UndoHandle(handle)
# scan header (no tab-delimited columns)
record_re = re.compile("(.*\t){10}")
n_lines = read_and_call_until(uhandle,consumer.header,has_re=record_re)
# one line of field headings
read_and_call(uhandle,consumer.field_names)
def feed(self, handle, consumer):
"""feed(self, handle, consumer)
Feed in BioGRID data for scanning. handle is a file-like
object that contains GRID data. consumer is a
Consumer object that will receive events as the report is scanned.
"""
if isinstance(handle, File.UndoHandle):
uhandle = handle
else:
uhandle = File.UndoHandle(handle)
consumer.start_record()
read_and_call(uhandle, consumer.interaction) #GRID interaction record
consumer.end_record()
class _GRIDRecordConsumer(AbstractConsumer):
"""Consumer that converts a BioGRID entry to an InteractionRecord object.
Members:
data InteractionRecord with BioGRID data.
"""
def __init__(self):
self.data = None
self.cgexp = re.compile('CG\d+')
self.fbexp = re.compile('FBgn\d+')
def start_record(self):
self.data = InteractionRecord()
self.field_mapping = range(11)
self.data.db = 'GRID'
def end_record(self):
self.data.id = hash(self.data.source+self.data.type+self.data.target)
pass
def header(self, line):
"""In the future you could initialize the consumer using info in header"""
#self.field_mapping = []
pass
def field_names(self, line):
"""Field names could be saved"""
pass
def interaction(self, line):
"""Splits interaction record into fields and maps them to consumer fns"""
fields = line.split('\t')
for i,field in enumerate(fields):
fn = self._field_fns[self.field_mapping[i]]
fn(self,field)
def source(self, field):
gene = self._drosophila_gene(field)
self.data.source = gene
def target(self, field):
gene = self._drosophila_gene(field)
self.data.target = gene
def source_symbol(self,field):
gene = self._drosophila_gene(field)
self.data.source_aliases.append(gene)
def target_symbol(self,field):
gene = self._drosophila_gene(field)
self.data.target_aliases.append(gene)
def source_aliases(self,field):
for alias in field.split('|'):
if alias != 'N/A':
gene = self._drosophila_gene(alias)
self.data.source_aliases.append(gene)
def target_aliases(self,field):
for alias in field.split('|'):
if alias != 'N/A':
gene = self._drosophila_gene(alias)
self.data.target_aliases.append(gene)
def experimental_system(self,field):
self.data.type = field
def study(self,field):
pass
def PMID(self,field):
self.data.PMID = field
def source_taxonomy_id(self,field):
self.data.source_taxon_id = field
def target_taxonomy_id(self,field):
self.data.target_taxon_id = field
def _drosophila_gene(self,gene):
match = self.cgexp.search(gene)
if match:
return match.group()
match = self.fbexp.search(gene)
if match:
return match.group()
return gene
_field_fns = [source,
target,
source_symbol,
target_symbol,
source_aliases,
target_aliases,
experimental_system,
study,
PMID,
source_taxonomy_id,
target_taxonomy_id,
]
You are not allowed to view this page.
