# MARC Program
# Assisting Bioinformatics Programs 
#
# Written by: Bienvenido Velez

from Bio import SeqIO
from Bio import Fasta
from Bio.Blast import NCBIXML
from Bio.Blast import NCBIWWW
import MySQLdb

# In order to run this code you must install the BioPython and MySQLdb modules
# You can get MySQLdb from http://sourceforge.net/projects/mysql-python
# You can get BioPython from ww.bioPython.org


# In the follwing code you will find some useful functions to simplify
# the loading of Fasta records, running Blasts and loading blast records for
# Processing.  You can run this file and it will conduct a Blast
# search on the first sequence stored in a Fasta File

def loadFastaSequences(fastaFilename):
    'Loads (parses) all sequences in a Fasta file and returns a list of Fasta Record objects'
    input_file = open(fastaFilename)
    records = list(SeqIO.parse(input_file, "fasta"))
    input_file.close()
    frecords = list()
    for fastaSeq in records:
        nextRecord = Fasta.Record()
        nextRecord.title = fastaSeq.description
        nextRecord.sequence = fastaSeq.seq.data
        frecords = frecords + [nextRecord]
    return frecords

# This is how you can call the loadFastaSequences function (see full script below)
# fastaRecords = loadFastaSequences("aldehydeDehydrogenase.fasta")

# runBlastn - Conducts a nucleotide Blast run on the gives fastaSequence
# and save the blast run output in XML format into a file named 'outputFileName'
def runBlastn(fastaSequence,outputFileName):
    'Conducts a nucleotide Blast run on the given fastaSequence and save the blast run output in XML format'
    result_handle = NCBIWWW.qblast("blastn", "nr",fastaSequence)
    # save the blast results for later, in case we want to look at them
    save_file = open(outputFileName, "w")
    blast_results = result_handle.read()
    save_file.write(blast_results)
    save_file.close()
    return blast_results

# This is how you can use the 'runBlastn' function (See full script below)
# blast_results=runBlastn(fastaRecords[0],"myblast.out")

def loadBlastRecord(blastFilename):
    'Reads (parses) the output of a Blast run stored in a file and returns the correponding Blast record'
    blast_out = open(blastFilename, "r")
    b_parser = NCBIXML.BlastParser()
    b_record = b_parser.parse(blast_out)
    return b_record

# This is how you can use the loadBlastRecord function (see full script below)
# blastRecord = loadBlastRecord("myblast.out")

# Print some information form the Blast results
# For a complete description of the Blast Record objet see page 22
# the BioPython tutorial (www.bioPython.org)
def printResults(blastRecord):
    'Print the (partial) content of the Blast results record'
    E_VALUE_THRESH = 0.04
    for alignment in blastRecord[0].alignments[0:2]:
        for hsp in alignment.hsps:
            if hsp.expect < E_VALUE_THRESH:
                print "****Alignment****"
                print "accession: ", blastRecord[0].descriptions[0].accession
                print "accession: ", blastRecord[0].descriptions[1].accession
                print "sequence:", alignment.title
                print "length:", alignment.length
                print "e value:", hsp.expect
                print hsp.query[0:75] + "..."
                print "Query start:", hsp.query_start
                print hsp.match[0:75] + "..."
                print hsp.sbjct[0:75] + "..."
            
                print "Subject start:", hsp.sbjct_start
                print blastRecord[0].descriptions[0]


# You must install MySQL in order to use the following function
# The following function is harder to generalize as the
# relational database schema is higly dependent on what you want to
# perform with the data.  You can use this simple example as guide
# for creating your own schemas.

def migrateBlastToExcel(blastRecord, excelFilename):
    # Open Excel file for output
    saveFile = open(excelFilename, "w")
    # Insert the tuples into the relational table
    E_VALUE_THRESH = 0.04
    for alignment in blastRecord[0].alignments:
        for hsp in alignment.hsps:
            if hsp.expect < E_VALUE_THRESH:
                # Print a line with fields delimitted by comma
                title = alignment.title[0:20].replace(',',' ')
                line = '%s,%d,%f\n' % (title, alignment.length, hsp.expect)
                saveFile.write(line)
    saveFile.close()

def doIt():
    fastaRecords = loadFastaSequences("aldehydeDehydrogenase.fasta")
    blast_results=runBlastn(fastaRecords[0],"myblast.out")
    blastRecord = loadBlastRecord("myblast.out")
    printResults(blastRecord)
    #migrateBlastToMySQL(blastRecord)
    migrateBlastToExcel(blastRecord, "myblast.csv")
