# MARC Workshop at PSC

# Essential Computing for Bioinformatics
#
# Examples by: Stuart Pomerance
# Pittsburgh Supercomputing Center


# ex. 0 
#
# open a file for reading
# not too useful, since not doing anything with the file
# closing the file
#

def example0():
    file = open("test.txt","r")
    file.close()

# ex. 1
#
# open a file for reading
# read a single line of the file and store it in the variable 'line'
#

def example1(pathname):
    file = open(pathname,"r")
    line = file.readline()
    print line
    file.close()

# ex. 2
#
# open a file for reading
# read all the lines of the file and store them in the variable 'line'
#

def example2(pathname):
    file = open(pathname,"r")
    line = file.read()
    print line
    file.close()

# ex. 3
#
# read all the lines of the file and store them in a list
# print the 3rd line
#

def example3(pathname):
    file = open(pathname,"r")
    line = file.readlines()
    print line[2]
    file.close()

# ex. 4
#
# read 1 line at a time
# print the line numer and the line
#

def example4(pathname):
    file = open(pathname,"r")
    count = 0
    for line in file:
	print count,":",line
	count = count + 1
    file.close()

# ex. 5
#
# read 1 line at a time
# print only the fasta sequence header
#

def example5(pathname):
    file = open(pathname,"r")
    count = 0
    for line in file:
	if line[0] == '>':
            print count,":",line
            count = count + 1
    file.close()

# ex. 6
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
#

def example6(pathname):
    file = open(pathname,"r")
    for line in file:
	if line[0] == '>':
            print "header",line
	else:
            print "data",line
    file.close()

# ex. 7
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
#

def example7(pathname):
    file = open(pathname,"r")
    for line in file:
	# remove the trailing '\n' and trailing spaces
	line = line.rstrip('\n ')

	# if the line length is < 1, there nothing to do for this line
	# so move to the next line
	if len( line ) < 1:
		continue
	if line[0] == '>':
		print "header",line
	else:
		print "data",line
    file.close()


# ex. 8
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
# collect the sequence data
#

def example8(pathname):
    file = open(pathname,"r") 

    sequence_data = ''

    for line in file:
	# remove the trailing '\n' and trailing spaces
	line = line.rstrip('\n ')

	# if the line length is < 1, there nothing to do for this line
	# so move to the next line
	if len( line ) < 1:
		continue
	if line[0] == '>':
		print "header",line
	else:
		print "data",line
		sequence_data = sequence_data + line
    file.close()
    print sequence_data


# ex. 9
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
# collect the sequence data for multiple sequences in a file
# using a list
#

def example9():
    file = open(pathname,"r") 
    sequence_data = []
    for line in file:

	# remove the trailing '\n' and trailing spaces
	line = line.rstrip('\n ')

	# if the line length is < 1, there nothing to do for this line
	# so move to the next line
	if len( line ) < 1:
		continue

	if line[0] == '>':
		print "header",line
		sequence_data.append('')
	else:
		print "data",line
		k = len(sequence_data) - 1
		sequence_data[k] = sequence_data[k] + line
    file.close()
    print sequence_data

# ex. 10
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
# collect the sequence data for multiple sequences in a file
# using a list
# making the whole thing a function

def readFastaFile(filename):

    file = open(filename,"r") 
    
    sequence_data = []
    
    for line in file:

        # remove the trailing '\n' and trailing spaces
        line = line.rstrip('\n ')

        # if the line length is < 1, there nothing to do for this line
        # so move to the next line
        if len( line ) < 1:
            continue
        if line[0] == '>':
            sequence_data.append([line.replace(',',' '),''])
        else:
            k = len(sequence_data) - 1
            sequence_data[k][1] = sequence_data[k][1] + line

    file.close()
    return(sequence_data)

def writeSequenceToFastaFile(outfile, sequence):
    'Writes the DNA/RNA sequence or protein to Fasta file 70 characters per line'
    charsPerline = 70
    nextPosition = 0
    while (nextPosition < len(sequence)):
        outfile.write(sequence[nextPosition:nextPosition+charsPerLine])
        nextPosition = nextPosition + charsPerLine

def translateFastaFile(infilename, outfilename):
    ' Translates a Fasta file with sequences into a Fasta file with their reverse complement'
    infile = open(filename,"r")
    outfile = open(filename,"w")
    sequence_data = ''
    for line in infile:
        # remove the trailing '\n' and trailing spaces
        line = line.rstrip('\n ')
        # if the line length is < 1, do nothing
        # so skip rest of iteration
        if len( line ) < 1:
            continue
        if (line[0] == '>' and len(sequence_data)):
            outfile.write(line+'\n')
            writeSequenceToFastaFile(outfile, translateDNASequence(sequence_data))
            sequence_data='' # Reset for next sequence
        else:
            sequence_data = sequence_data + line
        infile.close()
        outfile.close()
