# wget http://cs.whitman.edu/~davisj/cs/167/2016F/exmpls/dna.py

# Code from Havill (2016), _Discovering Computer Science_
# transcribed by Andy Exley and Janet Davis

import urllib.request as web

def getFASTA(dnaid):
    '''Fetch the DNA sequence with the given id fro NCBI and
       return it as a string

    Parameter:
        dnaid: the identifier of a DNA sequence

    Return value: a string containing the sequence with the given id
    '''

    prefix1 = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    prefix2 = '?db=nuccore&id='
    suffix = '&rettype=fasta&retmode=text'
    url = prefix1 + prefix2 + dnaid + suffix

    readpage = web.urlopen(url)
    header = readpage.readline()
    dna = ''
    for line in readpage:
        line = line[:-1]
        dna = dna + line.decode('utf-8')
    readpage.close()
    return dna

def reverse(dna):
    """Return the reverse of a DNA sequence

    Parameter:
        dna: a string representing a DNA sequence

    Return value: the reverse of the DNA sequence
    """
    revdna = ''
    for nt in dna:
        revdna = nt + revdna
    return revdna

def complementNucleotide(nt):
    """Return the complement of a single nucleotide.
    """
    if nt == 'a':
      return 't'
    elif nt == 't':
      return 'a'
    elif nt == 'c':
      return 'g'
    elif nt == 'g':
      return 'c'

def complement(dna):
    """Return the complement of a dna sequence
    Parameter:
        dna: a string representing a DNA sequence
    Return value: the complement of the DNA sequence
    """
    dna = dna.lower()
    compdna = ""
    for index in range(len(dna)):
        compdna += complementNucleotide(dna[index])
    return compdna

def reverseComplement(dna):
    """Return the reverse  complement of a dna sequence
    """
    return reverse(complement(dna))

def palindrome(dna):
    """Return true if the dna is the same as its reverse complement.
    (ex. 6.7.11, p.310)
    """
    return dna.lower() == reverseComplement(dna)

def countCodon(dna, target):
    """Return number of target codons in dna.
    Parameters:
        dna: a string object representing the dna sequence
        target: a three-letter string object representing the codon to
                search for
    Return value: The number of instances of the target codon in dna
    Don't use the count method! Use a loop!
    """
    count = 0
    for index in range(0, len(dna)):
        codon = dna[index : index+3]
        # YOUR CODE HERE
    return count

def printCodons(dna):
    """Print the sequence of non-overlapping codons in the dna, 
       one codon per line.
    Parameters: dna: a dna sequence
    Return value: None
    """
    for index in range(0, len(dna), 3):
        codon = dna[index:index+3]
        print(codon)

def printAllReadingFrames(dna):
    """Print sequences of non-overlapping codons in the dna,
       with all three possible reading frames.
    Parameter: dna: a dna sequence
    Return value: none
    Example: For the dna sequence "AGGCCTGGC", should print
    0   1   2
    AGG GGC GCC
    CCT CTG TGG
    GGC
    """
    print("")

def CpG(dna): # Exercise 6.7.7
    """Computes the fraction of dinucleotides that are CG.
    Parameter: dna: a dna sequence
    Return value: a real number between 0 and 1
    Example: CpG("atcgttcg") = 0.5
    """
    dna = dna.lower()
    # Find how many times cg is in the string
    cgs = 0
    for index in range(0, len(dna), 2):
      if dna[index:index+2] == 'cg':
        cgs += 1
    # Divide by number of dinucleotides
    return cgs / (len(dna)//2)

def main():
    # short dna sequence for testing
    pythondna = 'AGGCCTGGCGCAATATGGTTCATGGGGTCACGAAGAGTCGGACACGACTTAACGACTAAA'
    
    # uncomment the following line to work with the full dna sequence
    #pythondna = getFASTA('AEQU02000001')

    print(pythondna)
    #print(complement(pythondna))
    #print(reverseComplement(pythondna))
    #print()
    #print(palindrome(pythondna))
    #print(palindrome('gaattc'))
    #print()
    #printCodons(pythondna)
    #print()
    #print(CpG("atcgttcg"))
    print(CpG(pythondna))

if __name__=='__main__':
    main()
