Primer design for HT expression of CAZymes

Open In Colab
%%capture
# Install pydna (only when running on Colab)
import sys
if 'google.colab' in sys.modules:
    %pip install pydna[clipboard,download,express,gel] teemi
    !curl -LO "https://github.com/pydna-group/pydna/raw/master/docs/notebooks/GeneByLocusTag_CAZymes.fasta"
    !curl -LO "https://github.com/pydna-group/pydna/raw/master/docs/notebooks/GeneByLocusTag_prot_CAZymes.fasta"
    !curl -LO "https://github.com/pydna-group/pydna/raw/master/docs/notebooks/SignalP_output_CAZymes.gff3"

⚠️ This notebook uses the extra dependency teemi. Run it in google Colab, or in an environment where you install teemi as well as pydna.

Shamelessly adapeted from: https://github.com/hiyama341/cazyme_primer_design/blob/main/notebooks/00-cazyme_primer_design.ipynb

In this notebook, we design a high-throughput CAZyme library by automating key steps in the primer design workflow. Specifically, we:

  1. Read nucleotide sequences from a text file

  2. Translate sequences to protein

  3. Import SignalP output to identify signal peptides

  4. Remove signal peptides from the native sequences

  5. Generate primers for downstream applications

This approach enables efficient processing of large sequence datasets and systematic design of primers for CAZymes without Signal peptides. In this example we have 11 targets but in principle there is not a limit. Happy bioengineering.

Functions for the notebook

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import pandas as pd
from Bio.Seq import Seq
from typing import List, Dict, Union
import requests
import json
from teemi.design.combinatorial_design import simple_amplicon_maker
from pydna.design import primer_design

def open_gff3_files(path: str = "") -> List[List[str]]:
    """
    Opens and reads a GFF3 file and returns its contents as a list of lists.

    Parameters:
    -----------
    path: str
        The path to the GFF3 file.

    Returns:
    --------
    List[List[str]]
        A list of lists containing the contents of the GFF3 file.
    """
    with open(path, "r") as infile:
        LINES = []
        for line in infile:
            LINES.append(line[:].split("\t"))
        LINES = LINES[1:]

    return LINES


def tidy_up_gff(lst_of_gff: list) -> list:
    """
    This function takes a list of GFF lines and returns a list of dictionaries,
    with each dictionary containing information on the signal peptides in the GFF file.

    Parameters:
    lst_of_gff (list): A list of GFF lines.

    Returns:
    list_of_peptides (list): A list of dictionaries, with each dictionary containing information on the signal peptides in the GFF file.
    """
    signal_peptides = {}
    list_of_peptides = []

    for peptide in lst_of_gff:
        signal_peptides["gene"] = peptide[0][:19]
        signal_peptides["start_pos"] = int(peptide[3]) - 1
        signal_peptides["end_pos"] = int(peptide[4]) + 1
        signal_peptides["signal_peptide_likelyhood"] = peptide[5]
        list_of_peptides.append(signal_peptides)
        signal_peptides = {
            "gene": "",
            "start_pos": "",
            "end_pos": "",
            "signal_peptide_likelyhood": "",
        }

    return list_of_peptides


def dict_of_signal_peptides(path: str = "") -> List[Dict[str, Union[str, int]]]:
    """
    Given a path to a GFF3 file, returns a list of dictionaries with information on signal peptides.

    Args:
        path (str): Path to the GFF3 file. Default is an empty string.

    Returns:
        list: A list of dictionaries where each dictionary contains the following keys:
            - 'gene' (str): Gene name of the signal peptide.
            - 'start_pos' (int): Start position of the signal peptide in the protein sequence.
            - 'end_pos' (int): End position of the signal peptide in the protein sequence.
            - 'signal_peptide_likelyhood' (str): The likelihood of the sequence being a signal peptide.
    """
    gff = open_gff3_files(path)
    dict_of_signal_peptides = tidy_up_gff(gff)
    return dict_of_signal_peptides


def read_gff_to_pd(path: str = "") -> pd.DataFrame:
    """
    Reads a GFF3 file and returns a pandas DataFrame with columns 'gene', 'start_pos', 'end_pos',
    and 'signal_peptide_likelyhood'.

    Parameters:
    -----------
    path : str
        The path to the GFF3 file.

    Returns:
    --------
    df : pandas.DataFrame
        A DataFrame with columns 'gene', 'start_pos', 'end_pos', and 'signal_peptide_likelyhood'.
    """

    gff = open_gff3_files(path)
    dict_of_signal_peptides = tidy_up_gff(gff)
    df = pd.DataFrame.from_records(dict_of_signal_peptides)

    return df




def primer_ta_neb(primer1, primer2, conc=0.4, prodcode="phusion-0"):
    """Calculates primer pair melting temp TA,  from NEB.

    Parameters
    ----------
    primer1 : str
        first primer to be used for finding the optimal ta
    primer2 : str
        second primer to be used for finding the optimal ta
    conc : float
    prodcode : str
        find product codes on nebswebsite: https://tmapi.neb.com/docs/productcodes

    Returns
    -------
    ta : int
        primer pair annealing temp

    """

    url = "https://tmapi.neb.com/tm/batch"
    seqpairs = [[primer1, primer2]]

    input = {"seqpairs": seqpairs, "conc": conc, "prodcode": prodcode}
    headers = {"content-type": "application/json"}
    res = requests.post(url, data=json.dumps(input), headers=headers)

    r = json.loads(res.content)

    if r["success"]:
        for row in r["data"]:
            return row["ta"]

    else:
        print("request failed")
        print(r["error"][0])


def primer_tm_neb1(primer, conc=0.4, prodcode="phusion-0"):
    """Calculates a single primers melting temp from NEB.

    Parameters
    ----------
    primer1 : str
    conc : float
    prodcode : str
        find product codes on nebswebsite: https://tmapi.neb.com/docs/productcodes

    Returns
    -------
    tm : int
        primer melting temperature

    """

    url = "https://tmapi.neb.com/tm/batch"
    seqpairs = [[primer]]

    input = {"seqpairs": seqpairs, "conc": conc, "prodcode": prodcode}
    headers = {"content-type": "application/json"}
    res = requests.post(url, data=json.dumps(input), headers=headers)

    r = json.loads(res.content)

    if r["success"]:
        for row in r["data"]:
            return row["tm1"]
    else:
        print("request failed")
        print(r["error"][0])
        
        
def make_amplicons(
    list_of_amplicons: list, target_tm=58, limit=10, tm_function=primer_tm_neb1):
    """Generates pydna.amplicons which contains primers with a target temperature.

    Parameters
    ----------
    list_of_amplicons : list
        list of pydna.Dseqrecords
    target_tm : int
        representing the target melting temperature for the primers (default=55)
    limit: int
        representing the minimum primer size (default=5)
    tm_function : function
        for calculating primer melting temperature (default=primer_tm_neb)

    Returns:
    amplicons: list
        list of amplicon objects with designed primer sequences
    """
    amplicons = []
    for i in range(len(list_of_amplicons)):
        amplicon = primer_design(
            list_of_amplicons[i],
            target_tm=target_tm,
            limit=limit,
            tm_function=tm_function,
        )

        amplicons.append(amplicon)

    return amplicons

Steps

I would like:

  1. Read sequences from a text file

  2. Translate sequences

  3. Read in SignalP output

  4. Remove signal peptides from native sequences

  5. Generate primers

1 - Read sequences

from Bio import SeqIO
sequences = list()
for sequence in SeqIO.parse('GeneByLocusTag_CAZymes.fasta',format='fasta'):
    print(sequence)
    sequences.append(sequence)
ID: AO090005001389
Name: AO090005001389
Description: AO090005001389
Number of features: 0
Seq('ATGGGAGCTCCCCGGCTTCGGATCAAAGGCGCAACCTTCAAAGACCCAAATAAT...TAG')
ID: AO090038000444
Name: AO090038000444
Description: AO090038000444
Number of features: 0
Seq('ATGCGTTCCTTGTCGTCTATCGCACTCCTGTCTGTGGTGGGAGCTGCGTCTGCC...TAA')
ID: AO090012000917
Name: AO090012000917
Description: AO090012000917
Number of features: 0
Seq('ATGCGGGCGTCACTACCGTTTCTTACTGCCTTGGGATGTATTCCAGCCGCTTTG...TGA')
ID: AO090005001553
Name: AO090005001553
Description: AO090005001553
Number of features: 0
Seq('ATGAAATTCCGTAACCTTTTCTTTGCTGCCGTAGCTGGCTCTGCGGTTGCTGCT...TAG')
ID: AO090011000715
Name: AO090011000715
Description: AO090011000715
Number of features: 0
Seq('ATGAGAATCGGCAACTTGATCGTGGCTGCAAGTGCTGCAAGCCTGGTGCATGCG...TAA')
ID: AO090011000757
Name: AO090011000757
Description: AO090011000757
Number of features: 0
Seq('ATGAAAGTCACCAGGCTAGCGGTTCTGAATACCCTGGCAACCCTAACTGTTGCC...TAG')
ID: AO090003000990
Name: AO090003000990
Description: AO090003000990
Number of features: 0
Seq('ATGTTTATTAAACTTTTAAACAAAGCGCTTCTAGTGCTTGGTCTGCTGTCAGCT...TAA')
ID: AO090009000373
Name: AO090009000373
Description: AO090009000373
Number of features: 0
Seq('ATGACTGACCCAAACTATACACCACCAACCGTGGAGGATATATTTCGCTACCGA...TGA')
ID: AO090012000006
Name: AO090012000006
Description: AO090012000006
Number of features: 0
Seq('ATGGCAATGCTGTTCACTCCCATTGCGGCTGCATCACTGCTAGCGGTGGTGGGC...TAA')
ID: AO090001000604
Name: AO090001000604
Description: AO090001000604
Number of features: 0
Seq('ATGCCGAGTCACTCGCGCAGTCGGGACCGCTATCGCAGCGAGCGGGATCCGAGC...TAA')
ID: AO090010000122
Name: AO090010000122
Description: AO090010000122
Number of features: 0
Seq('ATGGAGCTCTCTTCTGGATCAAGAGCGGAGCATCGCCCGATCCACGCGTGGGCC...TGA')
ID: AO090003001590
Name: AO090003001590
Description: AO090003001590
Number of features: 0
Seq('ATGCAACCATTACGGCTTTTGGCTCTCACAGCCATTCTTAAGGGCGCCTGGGCC...TGA')
print(sequences[0])
ID: AO090005001389
Name: AO090005001389
Description: AO090005001389
Number of features: 0
Seq('ATGGGAGCTCCCCGGCTTCGGATCAAAGGCGCAACCTTCAAAGACCCAAATAAT...TAG')

2 - Translate sequences

proteins = list()
for sequence in SeqIO.parse('GeneByLocusTag_prot_CAZymes.fasta',format='fasta'):
    print(sequence)
    proteins.append(sequence)
ID: AO090005001389
Name: AO090005001389
Description: AO090005001389
Number of features: 0
Seq('MGAPRLRIKGATFKDPNNREITLRGINVAGESKYPKSPDTPSYVPDKFFETDDV...TIM')
ID: AO090038000444
Name: AO090038000444
Description: AO090038000444
Number of features: 0
Seq('MRSLSSIALLSVVGAASAQAGPWAQCGGKSFSGSSECASGWKCQELNEWFSQCV...NKN')
ID: AO090012000917
Name: AO090012000917
Description: AO090012000917
Number of features: 0
Seq('MRASLPFLTALGCIPAALAAPHPRVQSPEYVNWTTFKANGVNLGGWLVQESTID...DTK')
ID: AO090005001553
Name: AO090005001553
Description: AO090005001553
Number of features: 0
Seq('MKFRNLFFAAVAGSAVAAPLAKEQKKRDSVFQWIGANESGAEFGENNLPGVWGT...YLG')
ID: AO090011000715
Name: AO090011000715
Description: AO090011000715
Number of features: 0
Seq('MRIGNLIVAASAASLVHAYPTRDIKKRGSGFTWVGVSESGAEFGSSIPGTLGTD...QCL')
ID: AO090011000757
Name: AO090011000757
Description: AO090011000757
Number of features: 0
Seq('MKVTRLAVLNTLATLTVAWLPTTDKTITSSNGTDLFKASHGKIRGVNLGSQFVF...RSS')
ID: AO090003000990
Name: AO090003000990
Description: AO090003000990
Number of features: 0
Seq('MFIKLLNKALLVLGLLSAGTQAATIRLDPRASSFDYNGEKVRGVNLGGWLVLEP...KQC')
ID: AO090009000373
Name: AO090009000373
Description: AO090009000373
Number of features: 0
Seq('MTDPNYTPPTVEDIFRYRYQHGTNLGSMFVHGPWLSDGASSSDSGGSRELEEVK...SVH')
ID: AO090012000006
Name: AO090012000006
Description: AO090012000006
Number of features: 0
Seq('MAMLFTPIAAASLLAVVGTQPTGVFAQDAEGWYKAHPGMSRIKDVNQDTHQIVD...TRK')
ID: AO090001000604
Name: AO090001000604
Description: AO090001000604
Number of features: 0
Seq('MPSHSRSRDRYRSERDPSRRYREVYDDDDDDDFDYHPRERRRYRRDDYQHDIRS...EYY')
ID: AO090010000122
Name: AO090010000122
Description: AO090010000122
Number of features: 0
Seq('MELSSGSRAEHRPIHAWAQNVYQGRISPESRFYKNSRILKSELSFRNRQRTANN...SRK')
ID: AO090003001590
Name: AO090003001590
Description: AO090003001590
Number of features: 0
Seq('MQPLRLLALTAILKGAWALSANCTGSFDAISASDFVANINPGWNLGNSLDATPN...LTV')

3 - Read in SignalP output

signal_pep = read_gff_to_pd('SignalP_output_CAZymes.gff3')
signal_pep
gene start_pos end_pos signal_peptide_likelyhood
0 AO090038000444 0 19 0.9994235
1 AO090012000917 0 20 0.99983215
2 AO090005001553 0 28 0.999823
3 AO090011000715 0 28 0.9998376
4 AO090011000757 0 19 0.9998413
5 AO090003000990 0 23 0.9998205
6 AO090012000006 0 27 0.99984413
7 AO090003001590 0 19 0.99982584
N_pos = signal_pep['end_pos'].to_list()
genes_with_signal_peptides = signal_pep['gene'].to_list()

my_dict = {'name':genes_with_signal_peptides, 'n_pos':N_pos }
my_dict


{'name': ['AO090038000444',
  'AO090012000917',
  'AO090005001553',
  'AO090011000715',
  'AO090011000757',
  'AO090003000990',
  'AO090012000006',
  'AO090003001590'],
 'n_pos': [19, 20, 28, 28, 19, 23, 27, 19]}

4 - Remove signal peptides


clean_seq = list()
for i in range(len(genes_with_signal_peptides)):
    for seq in sequences: 
        if genes_with_signal_peptides[i] == seq.id:
            clean_seq.append(seq[N_pos[i]*3:-3])
            
for seq in sequences:
    if seq.id not in genes_with_signal_peptides:
        clean_seq.append(seq[3:-3])
            
len(clean_seq)
12
clean_seq
[SeqRecord(seq=Seq('GCTGGTCCTTGGGCTCAGTGTGGAGGCAAGTCCTTCTCCGGCTCATCCGAGTGT...AAC'), id='AO090038000444', name='AO090038000444', description='AO090038000444', dbxrefs=[]),
 SeqRecord(seq=Seq('CCCCATCCCCGAGTGCAGAGTCCGGAGTATGTCAACTGGACAACTTTCAAAGCC...AAG'), id='AO090012000917', name='AO090012000917', description='AO090012000917', dbxrefs=[]),
 SeqRecord(seq=Seq('TCAGTTTTCCAATGTTAGTCTGCCCTTCTTCTTCTTCTTCTTCTTCTTCTTTCT...GGG'), id='AO090005001553', name='AO090005001553', description='AO090005001553', dbxrefs=[]),
 SeqRecord(seq=Seq('TCGGGATTCACCTGTACGTCAAGTATTGGCTCTGAAGTTACTGGATCTAACGAA...CTG'), id='AO090011000715', name='AO090011000715', description='AO090011000715', dbxrefs=[]),
 SeqRecord(seq=Seq('CTTCCAACAACCGACAAGACAATCACCTCTAGTAATGGAACCGATCTCTTCAAA...AGC'), id='AO090011000757', name='AO090011000757', description='AO090011000757', dbxrefs=[]),
 SeqRecord(seq=Seq('ACTATGTCAGTATAACCGTGATGTTACCCCTTTTGCTCTGCATAGTCCCTTACT...TGC'), id='AO090003000990', name='AO090003000990', description='AO090003000990', dbxrefs=[]),
 SeqRecord(seq=Seq('GATGCCGAGGGTTGGTACAAGGCTCATCCCGGAATGTCGCGCATCAAGGATGTG...AAA'), id='AO090012000006', name='AO090012000006', description='AO090012000006', dbxrefs=[]),
 SeqRecord(seq=Seq('AGTGCCAATTGTACAGGGTCCTTTGACGCCATCTCGGCCTCTGATTTCGTCGCA...GTT'), id='AO090003001590', name='AO090003001590', description='AO090003001590', dbxrefs=[]),
 SeqRecord(seq=Seq('GGAGCTCCCCGGCTTCGGATCAAAGGCGCAACCTTCAAAGACCCAAATAATCGA...ATG'), id='AO090005001389', name='AO090005001389', description='AO090005001389', dbxrefs=[]),
 SeqRecord(seq=Seq('ACTGACCCAAACTATACACCACCAACCGTGGAGGATATATTTCGCTACCGATAT...CAT'), id='AO090009000373', name='AO090009000373', description='AO090009000373', dbxrefs=[]),
 SeqRecord(seq=Seq('CCGAGTCACTCGCGCAGTCGGGACCGCTATCGCAGCGAGCGGGATCCGAGCAGA...TAT'), id='AO090001000604', name='AO090001000604', description='AO090001000604', dbxrefs=[]),
 SeqRecord(seq=Seq('GAGCTCTCTTCTGGATCAAGAGCGGAGCATCGCCCGATCCACGCGTGGGCCCAG...AAG'), id='AO090010000122', name='AO090010000122', description='AO090010000122', dbxrefs=[])]
from pydna.dseqrecord import Dseqrecord
clean_seq = [Dseqrecord(seq) for seq in clean_seq]
clean_seq[0].seq
Dseq(-1510)
GCTG..GAAC
CGAC..CTTG

5 - Generate primers

amplicons = make_amplicons(clean_seq,
                           target_tm=60, # target melting temperature
                           limit=15,  # minimum primer length
                           tm_function = primer_tm_neb1 #calculating melting temperature 
                           )
amplicons[0].forward_primer.seq
Seq('GCTGGTCCTTGGGCT')
forward_primer = [str(f.forward_primer.seq) for f in amplicons]
r_primer = [str(r.reverse_primer.seq) for r in amplicons]
name = [str(r.name) for r in amplicons]
gene = [str(r.id) for r in clean_seq]
aneal_f = [primer_tm_neb1(str(r)) for r in forward_primer]
aneal_r = [primer_tm_neb1(str(r)) for r in r_primer]
ta= [primer_ta_neb(str(f.forward_primer.seq),str(f.reverse_primer.seq))  for f in amplicons]
ta
[61, 61, 62, 61, 61, 63, 61, 62, 63, 63, 62, 62]
df = pd.DataFrame({'gene_name (locus_tag)': gene, 
                   'f_seq':forward_primer, 
                   'r_seq':r_primer,
                    'f_tm': aneal_f, 
                    'r_tm': aneal_r, 
                    'ta':ta})
df
gene_name (locus_tag) f_seq r_seq f_tm r_tm ta
0 AO090038000444 GCTGGTCCTTGGGCT GTTCTTGTTGACATTTTCCATATGGT 57 60 61
1 AO090012000917 CCCCATCCCCGAGTG CTTCGTATCATGGAATCTCGTAAG 57 58 61
2 AO090005001553 TCAGTTTTCCAATGTTAGTCTGC CCCCAGGTATGCCTGC 59 58 62
3 AO090011000715 TCGGGATTCACCTGTACG CAGGCACTGGCTGTAGT 58 58 61
4 AO090011000757 CTTCCAACAACCGACAAGAC GCTGCTTCGACCTTTACAG 58 58 61
5 AO090003000990 ACTATGTCAGTATAACCGTGATGTTAC GCACTGCTTAGGGAACTCC 60 59 63
6 AO090012000006 GATGCCGAGGGTTGGT TTTACGAGTGATTGTGATAGTGATGT 58 60 61
7 AO090003001590 AGTGCCAATTGTACAGGGT AACAGTCAAAGTAAAGTTGACCG 58 59 62
8 AO090005001389 GGAGCTCCCCGGCTT CATGATAGTACAGGCTCCACGTT 60 61 63
9 AO090009000373 ACTGACCCAAACTATACACCAC ATGAACACTGTAAGTGTTGATACTAACC 59 61 63
10 AO090001000604 CCGAGTCACTCGCGC ATAATATTCCGGTAAATCCCCGAAAC 59 61 62
11 AO090010000122 GAGCTCTCTTCTGGATCAAGAG CTTACGACTGTTGATGGCCG 59 60 62
amplicon_names = [f"#{i}_{name[i]}_{list(df['gene_name (locus_tag)'])[i]}" for i in range(len(name))]

for i in range(len(amplicon_names)):
    amplicons[i].name = amplicon_names[i]
    
for amplicon in amplicons:
    print(amplicon.name)
    print(amplicon.figure())
    print()
#0_1510bp_PCR_prod_AO090038000444
5GCTGGTCCTTGGGCT...ACCATATGGAAAATGTCAACAAGAAC3
                   ||||||||||||||||||||||||||
                  3TGGTATACCTTTTACAGTTGTTCTTG5
5GCTGGTCCTTGGGCT3
 |||||||||||||||
3CGACCAGGAACCCGA...TGGTATACCTTTTACAGTTGTTCTTG5

#1_1116bp_PCR_prod_AO090012000917
5CCCCATCCCCGAGTG...CTTACGAGATTCCATGATACGAAG3
                   ||||||||||||||||||||||||
                  3GAATGCTCTAAGGTACTATGCTTC5
5CCCCATCCCCGAGTG3
 |||||||||||||||
3GGGGTAGGGGCTCAC...GAATGCTCTAAGGTACTATGCTTC5

#2_1279bp_PCR_prod_AO090005001553
5TCAGTTTTCCAATGTTAGTCTGC...GCAGGCATACCTGGGG3
                           ||||||||||||||||
                          3CGTCCGTATGGACCCC5
5TCAGTTTTCCAATGTTAGTCTGC3
 |||||||||||||||||||||||
3AGTCAAAAGGTTACAATCAGACG...CGTCCGTATGGACCCC5

#3_1600bp_PCR_prod_AO090011000715
5TCGGGATTCACCTGTACG...ACTACAGCCAGTGCCTG3
                      |||||||||||||||||
                     3TGATGTCGGTCACGGAC5
5TCGGGATTCACCTGTACG3
 ||||||||||||||||||
3AGCCCTAAGTGGACATGC...TGATGTCGGTCACGGAC5

#4_1266bp_PCR_prod_AO090011000757
5CTTCCAACAACCGACAAGAC...CTGTAAAGGTCGAAGCAGC3
                        |||||||||||||||||||
                       3GACATTTCCAGCTTCGTCG5
5CTTCCAACAACCGACAAGAC3
 ||||||||||||||||||||
3GAAGGTTGTTGGCTGTTCTG...GACATTTCCAGCTTCGTCG5

#5_1299bp_PCR_prod_AO090003000990
5ACTATGTCAGTATAACCGTGATGTTAC...GGAGTTCCCTAAGCAGTGC3
                               |||||||||||||||||||
                              3CCTCAAGGGATTCGTCACG5
5ACTATGTCAGTATAACCGTGATGTTAC3
 |||||||||||||||||||||||||||
3TGATACAGTCATATTGGCACTACAATG...CCTCAAGGGATTCGTCACG5

#6_1481bp_PCR_prod_AO090012000006
5GATGCCGAGGGTTGGT...ACATCACTATCACAATCACTCGTAAA3
                    ||||||||||||||||||||||||||
                   3TGTAGTGATAGTGTTAGTGAGCATTT5
5GATGCCGAGGGTTGGT3
 ||||||||||||||||
3CTACGGCTCCCAACCA...TGTAGTGATAGTGTTAGTGAGCATTT5

#7_1950bp_PCR_prod_AO090003001590
5AGTGCCAATTGTACAGGGT...CGGTCAACTTTACTTTGACTGTT3
                       |||||||||||||||||||||||
                      3GCCAGTTGAAATGAAACTGACAA5
5AGTGCCAATTGTACAGGGT3
 |||||||||||||||||||
3TCACGGTTAACATGTCCCA...GCCAGTTGAAATGAAACTGACAA5

#8_2465bp_PCR_prod_AO090005001389
5GGAGCTCCCCGGCTT...AACGTGGAGCCTGTACTATCATG3
                   |||||||||||||||||||||||
                  3TTGCACCTCGGACATGATAGTAC5
5GGAGCTCCCCGGCTT3
 |||||||||||||||
3CCTCGAGGGGCCGAA...TTGCACCTCGGACATGATAGTAC5

#9_1584bp_PCR_prod_AO090009000373
5ACTGACCCAAACTATACACCAC...GGTTAGTATCAACACTTACAGTGTTCAT3
                          ||||||||||||||||||||||||||||
                         3CCAATCATAGTTGTGAATGTCACAAGTA5
5ACTGACCCAAACTATACACCAC3
 ||||||||||||||||||||||
3TGACTGGGTTTGATATGTGGTG...CCAATCATAGTTGTGAATGTCACAAGTA5

#10_2616bp_PCR_prod_AO090001000604
5CCGAGTCACTCGCGC...GTTTCGGGGATTTACCGGAATATTAT3
                   ||||||||||||||||||||||||||
                  3CAAAGCCCCTAAATGGCCTTATAATA5
5CCGAGTCACTCGCGC3
 |||||||||||||||
3GGCTCAGTGAGCGCG...CAAAGCCCCTAAATGGCCTTATAATA5

#11_1442bp_PCR_prod_AO090010000122
5GAGCTCTCTTCTGGATCAAGAG...CGGCCATCAACAGTCGTAAG3
                          ||||||||||||||||||||
                         3GCCGGTAGTTGTCAGCATTC5
5GAGCTCTCTTCTGGATCAAGAG3
 ||||||||||||||||||||||
3CTCGAGAGAAGACCTAGTTCTC...GCCGGTAGTTGTCAGCATTC5