Sophie

Sophie

distrib > Mandriva > 2008.1 > x86_64 > by-pkgid > 763d6289e1351f2d34257ce697a3ccb7 > files > 2395

biopython-doc-1.47-2mdv2008.1.x86_64.rpm

#!/usr/bin/env python
"""Example showing how to deal with internet BLAST from Biopython.

This code is described in great detail in the BLAST section of the Biopython
documentation.
"""
# standard library
import cStringIO

# biopython
from Bio.Blast import NCBIWWW
from Bio import Fasta

# first get the sequence we want to parse from a FASTA file
file_for_blast = open('m_cold.fasta', 'r')
f_iterator = Fasta.Iterator(file_for_blast)

f_record = f_iterator.next()

print 'Doing the BLAST and retrieving the results...'
result_handle = NCBIWWW.qblast('blastn', 'nr', f_record)

# save the results for later, in case we want to look at it
save_file = open('m_cold_blast.out', 'w')
blast_results = result_handle.read()
save_file.write(blast_results)
save_file.close()

print 'Parsing the results and extracting info...'
b_parser = NCBIWWW.BlastParser()

# option 1 -- parse the string directly
# b_record = b_parser.parse_str(blast_results)

# option 2 -- create a handle from the string and parse it
string_result_handle = cStringIO.StringIO(blast_results)
b_record = b_parser.parse(string_result_handle)

# now get the alignment info for all e values greater than some threshold
E_VALUE_THRESH = 0.1

for alignment in b_record.alignments:
    for hsp in alignment.hsps:
        if hsp.expect < E_VALUE_THRESH:
            print '****Alignment****'
            print 'sequence:', alignment.title
            print 'length:', alignment.length
            print 'e value:', hsp.expect
            print hsp.query[0:75] + '...'
            print hsp.match[0:75] + '...'
            print hsp.sbjct[0:75] + '...'