From 6684ffa8207696917e29e18a2414dd465e7255b9 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 15:34:50 +0000 Subject: [PATCH 1/8] closes #331 --- README.md | 2 +- docs/notebooks/CRISPR.ipynb | 266 +-- docs/notebooks/Dseq.ipynb | 1590 +++++++------- docs/notebooks/Dseq_Features.ipynb | 1640 +++++++------- docs/notebooks/Example_CRISPR.ipynb | 18 +- docs/notebooks/Example_Gibson.ipynb | 538 ++--- docs/notebooks/Example_Restriction.ipynb | 2032 +++++++++--------- docs/notebooks/Gibson.ipynb | 332 +-- docs/notebooks/Importing_Seqs.ipynb | 814 +++---- docs/notebooks/PCR.ipynb | 794 +++---- docs/notebooks/Restrict_Ligate_Cloning.ipynb | 642 +++--- docs/notebooks/primer_design.ipynb | 9 + docs/notebooks/readme_example.ipynb | 2 +- 13 files changed, 4344 insertions(+), 4335 deletions(-) diff --git a/README.md b/README.md index b884a43b..5def6106 100755 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Pereira, F., Azevedo, F., Carvalho, Â., Ribeiro, G. F., Budde, M. W., & Johanss ## Documentation and usage 📚 -Full documentation of all modules and classes can be found at [https://bjornfjohansson.github.io/pydna](https://bjornfjohansson.github.io/pydna). +Full documentation of all modules and classes can be found at [https://pydna-group.github.io/pydna](https://pydna-group.github.io/pydna). To get started, we recommend you to have a look at the [example notebooks](docs/notebooks). Start by having a look at [Dseq](docs/notebooks/Dseq.ipynb), [Dseq_Features](docs/notebooks/Dseq_Features.ipynb) and [Importing_Seqs](docs/notebooks/Importing_Seqs.ipynb), which cover the basics of working with sequences. The rest of the notebooks cover how to use pydna for different cloning strategies, such as Gibson assembly, Restriction-Ligation, etc. diff --git a/docs/notebooks/CRISPR.ipynb b/docs/notebooks/CRISPR.ipynb index bfb5f7b2..6fdd7525 100644 --- a/docs/notebooks/CRISPR.ipynb +++ b/docs/notebooks/CRISPR.ipynb @@ -1,134 +1,134 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to Model CRISPR-Cas9 Experiments in pydna\n", - "\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "The pydna package can simulate CRISPR-Cas9 editing, which allows one to cut DNA sequences at specific sites using guide RNAs (gRNAs) that direct the Cas9 protein. This page will guide you through the process of using the `pydna.crispr` module to model a CRISPR-Cas9 cut on a DNA sequence.\n", - "\n", - "The `pydna.crispr` module contains the `cas9` class to simulate the biological activites of the Cas9 protein and the guideRNA, which should be imported. In addtion, the `Dseqrecord` class has also been imported to generate an example target_sequence." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pydna.crispr import cas9, protospacer\n", - "from pydna.dseqrecord import Dseqrecord" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The target sequence and guideRNA (gRNA) sequence needs to be generated. Note the the sequence can be passed as a `Dseqrecord` object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cutting with enzyme 1: (Dseqrecord(-17), Dseqrecord(-6))\n", - "protospacer: GTTACTTTACCCGACGTCCC\n", - "cutting with enzyme 2: (Dseqrecord(-17), Dseqrecord(-6))\n", - "cutting with no PAM in target: ()\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.crispr import cas9, protospacer\n", - "\n", - "# <----protospacer---><-------scaffold----------------->\n", - "guide = \"GTTACTTTACCCGACGTCCCgttttagagctagaaatagcaagttaaaataagg\"\n", - "target = \"GTTACTTTACCCGACGTCCCaGG\"\n", - "# <->\n", - "# PAM\n", - "\n", - "# Create an enzyme object with the protospacer\n", - "enzyme = cas9(\"GTTACTTTACCCGACGTCCC\")\n", - "\n", - "target_dseq = Dseqrecord(target)\n", - "\n", - "# Cut using the enzyme\n", - "print('cutting with enzyme 1:', target_dseq.cut(enzyme))\n", - "\n", - "\n", - "# Get the protospacer from the full gRNA sequence\n", - "gRNA_protospacers = protospacer(Dseqrecord(guide), cas=cas9)\n", - "# Print the protospacer (it's a list because often plasmids contain multiple gRNAs)\n", - "print('protospacer:', gRNA_protospacers[0])\n", - "gRNA_protospacer = gRNA_protospacers[0]\n", - "\n", - "# Create an enzyme from the protospacer\n", - "enzyme2 = cas9(gRNA_protospacer)\n", - "\n", - "# Simulate the cut\n", - "print('cutting with enzyme 2:', target_dseq.cut(enzyme2))\n", - "\n", - "\n", - "# Note that without the PAM, the cut will not be made.\n", - "\n", - "target_noPAM_dseq = Dseqrecord(\"GTTACTTTACCCGACGTCCCaaa\")\n", - "print(\"cutting with no PAM in target:\", target_noPAM_dseq.cut(enzyme2))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to Model CRISPR-Cas9 Experiments in pydna\n", + "\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "The pydna package can simulate CRISPR-Cas9 editing, which allows one to cut DNA sequences at specific sites using guide RNAs (gRNAs) that direct the Cas9 protein. This page will guide you through the process of using the `pydna.crispr` module to model a CRISPR-Cas9 cut on a DNA sequence.\n", + "\n", + "The `pydna.crispr` module contains the `cas9` class to simulate the biological activites of the Cas9 protein and the guideRNA, which should be imported. In addtion, the `Dseqrecord` class has also been imported to generate an example target_sequence." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydna.crispr import cas9, protospacer\n", + "from pydna.dseqrecord import Dseqrecord" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The target sequence and guideRNA (gRNA) sequence needs to be generated. Note the the sequence can be passed as a `Dseqrecord` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cutting with enzyme 1: (Dseqrecord(-17), Dseqrecord(-6))\n", + "protospacer: GTTACTTTACCCGACGTCCC\n", + "cutting with enzyme 2: (Dseqrecord(-17), Dseqrecord(-6))\n", + "cutting with no PAM in target: ()\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.crispr import cas9, protospacer\n", + "\n", + "# <----protospacer---><-------scaffold----------------->\n", + "guide = \"GTTACTTTACCCGACGTCCCgttttagagctagaaatagcaagttaaaataagg\"\n", + "target = \"GTTACTTTACCCGACGTCCCaGG\"\n", + "# <->\n", + "# PAM\n", + "\n", + "# Create an enzyme object with the protospacer\n", + "enzyme = cas9(\"GTTACTTTACCCGACGTCCC\")\n", + "\n", + "target_dseq = Dseqrecord(target)\n", + "\n", + "# Cut using the enzyme\n", + "print('cutting with enzyme 1:', target_dseq.cut(enzyme))\n", + "\n", + "\n", + "# Get the protospacer from the full gRNA sequence\n", + "gRNA_protospacers = protospacer(Dseqrecord(guide), cas=cas9)\n", + "# Print the protospacer (it's a list because often plasmids contain multiple gRNAs)\n", + "print('protospacer:', gRNA_protospacers[0])\n", + "gRNA_protospacer = gRNA_protospacers[0]\n", + "\n", + "# Create an enzyme from the protospacer\n", + "enzyme2 = cas9(gRNA_protospacer)\n", + "\n", + "# Simulate the cut\n", + "print('cutting with enzyme 2:', target_dseq.cut(enzyme2))\n", + "\n", + "\n", + "# Note that without the PAM, the cut will not be made.\n", + "\n", + "target_noPAM_dseq = Dseqrecord(\"GTTACTTTACCCGACGTCCCaaa\")\n", + "print(\"cutting with no PAM in target:\", target_noPAM_dseq.cut(enzyme2))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Dseq.ipynb b/docs/notebooks/Dseq.ipynb index ffa836b9..ce4a1401 100644 --- a/docs/notebooks/Dseq.ipynb +++ b/docs/notebooks/Dseq.ipynb @@ -1,797 +1,797 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Representing sequences in pydna\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pydna contains classes to represent double stranded DNA sequences that can:\n", - "\n", - "* Be linear\n", - "* Be circular\n", - "* Contain overhangs (sticky ends).\n", - "\n", - "These sequences can be used to simulate molecular biology methods such as cloning and PCR. The main classes used to represent sequences are `Dseq` and `Dseqrecord`.\n", - "* `Dseq` represents the sequence only. Think of it as a FASTA file.\n", - "* `Dseqrecord` can contain sequence features and other info such as publication, authors, etc. Think of it as a Genbank file.\n", - "\n", - "> NOTE: The `Dseq` class is a subclass of biopython's `Seq`, whose documentation can be found [here](https://biopython.org/wiki/Seq). `Dseqrecord` is a subclass of biopython's `SeqRecord`, whose documentation can be found [here](https://biopython.org/wiki/SeqRecord)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Dseq Class\n", - "\n", - "We can create a `Dseq` object in different ways.\n", - "\n", - "For a linear sequence without overhangs, we create a `Dseq` object passing a string with the sequence. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "aatat\n", - "ttata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.dseq import Dseq\n", - "my_seq = Dseq(\"aatat\")\n", - "my_seq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the console representation above, there are three lines:\n", - "1. `Dseq(-5)` indicates that the sequence is linear and has 5 basepairs.\n", - "2. `aatat`, the top / sense / watson strand, referred from now on as **watson** strand..\n", - "3. `ttata`, the bottom / anti-sense / crick strand, referred from now on as **crick** strand.\n", - "\n", - "Now, let's create a circular sequence:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(o5)\n", - "aatat\n", - "ttata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", circular=True)\n", - "my_seq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> Note how `o5` indicates that the sequence is circular and has 5 basepairs.\n", - "\n", - "One way to represent a linear sequence with overhangs is to instantiate `Dseq` with the following arguments:\n", - "* The `watson` strand as a string in the 5'-3' direction.\n", - "* The `crick` strand as a string in the 5'-3' direction.\n", - "* The 5' overhang `ovhg` (overhang), which can be positive or negative, and represents the number of basepairs that the `watson` strand extends beyond the `crick` strand." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "actag\n", - " gatc" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"actag\", \"ctag\", -1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> Note how the bottom strand is passed in the 5'-3' direction, but it is represented in the 3'-5' direction in the console output.\n", - "\n", - "If you omit the `ovhg` argument, pydna will try to find the value that makes the `watson` and `crick` strands complementary." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "actag\n", - " gatc" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"actag\", \"ctag\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The best way to get a feeling for the meaning of `ovhg` is to visualise the possible scenarios as such:\n", - "\n", - "```\n", - "dsDNA overhang\n", - "\n", - " nnn... 2\n", - "nnnnn...\n", - "\n", - " nnnn... 1\n", - "nnnnn...\n", - "\n", - "nnnnn... 0\n", - "nnnnn...\n", - "\n", - "nnnnn... -1\n", - " nnnn...\n", - "\n", - "nnnnn... -2\n", - " nnn...\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of note, the DNA sequence can be passed in both lower case and upper case, and are not restricted to the conventional ATCG nucleotides (E.g ), The class supports the IUPAC ambiguous nucleotide code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "Actag\n", - " gatC" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"Actag\", \"Ctag\", -1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another way to pass the overhangs is to use the `from_full_sequence_and_overhangs` classmethod, which only needs the `watson`/sense strand. This is useful you can only store the entire sequence (e.g. in a FASTA file), or if you want to specify overhangs on both sides of the double stranded DNA when you create the object.\n", - "\n", - "Both the `watson_ovhg` and `crick_ovhg` can be passed following the same rules as above. Specifically, the `crick_ovhg` argument is identical to the conventional `ovhg` argument. The `watson_ovhg` argument is the `ovhg` argument applied to the reverse complementary sequence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-8)\n", - "aaatta\n", - " aattt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", - "my_seq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A list of possible scenarios, applying positive and negative `crick_ovhg` and `watson_ovhg` to a `Dseq` object are visualised in the output of the code below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "watson_ovhg is -3, crick_ovhg is -2\n", - "Dseq(-8)\n", - "aaatt\n", - " taattt\n", - "\n", - "watson_ovhg is 3, crick_ovhg is -2\n", - "Dseq(-8)\n", - "aaattaaa\n", - " taa\n", - "\n", - "watson_ovhg is -3, crick_ovhg is 2\n", - "Dseq(-8)\n", - " att\n", - "tttaattt\n", - "\n", - "watson_ovhg is 3, crick_ovhg is 2\n", - "Dseq(-8)\n", - " attaaa\n", - "tttaa\n", - "\n" - ] - } - ], - "source": [ - "for crick_ovhg in [-2, 2]:\n", - " for watson_ovhg in [-3, 3]:\n", - " print(\"watson_ovhg is \" + str(watson_ovhg) + \", crick_ovhg is \" + str(crick_ovhg))\n", - " my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg, watson_ovhg)\n", - " print(my_seq.__repr__() + \"\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The drawing below can help visualize the meaning of the overhangs.\n", - "```\n", - " (-3)--(-2)--(-1)--(x)--(x)--(x)--(-1)--(-2)\n", - "\n", - "5'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)3'\n", - "3'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)5'\n", - "\n", - "5'( a)--( a)--( a)--(t)--(t)--(a)--( )--( )3'\n", - "3'( )--( )--( )--(t)--(t)--(a)--( a)--( a)5'\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you would like to check the overhangs for a `Dseq` object, it can be done by calling the methods `five_prime_end` and `three_prime_end` to show the 5' and 3' overhangs, respectively. An example of a `Dseq` object, and examples showing what the print-out of the methods looks like are demonstrated here:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseq(-7)\n", - "aatat\n", - " atatt\n", - "(\"5'\", 'aa')\n", - "(\"5'\", 'tt')\n" - ] - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", - "print(my_seq.__repr__())\n", - "print(my_seq.five_prime_end())\n", - "print(my_seq.three_prime_end())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you now want to join your sequence's sticky ends to make a circular sequence (i.e Plasmid), you can use the `looped` method. The sticky ends must be compatible to do so.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(o5)\n", - "aatat\n", - "ttata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", - "my_seq.looped()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you want to change the circular origin of the sequence/plasmid, this can be easily done using the `shifted` method. This can be done by providing the number of bases between the original origin with the new origin: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(o5)\n", - "tataa\n", - "atatt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", circular=True)\n", - "my_seq.shifted(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## __getitem__, __repr__, and __str__ methods\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Slicing sequences (`__getitem__`)\n", - "\n", - "`__getitem__` is the method that is called when you use the square brackets `[]` after a python object. Below is an example of the builtin python `list`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "using square brackets: [2, 3]\n", - "is the same as using __getitem__: [2, 3]\n" - ] - } - ], - "source": [ - "my_list = [1, 2, 3]\n", - "\n", - "print('using square brackets:', my_list[1:])\n", - "print('is the same as using __getitem__:', my_list.__getitem__(slice(1, None)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `__getitem__` method is modified in pydna to deal with `Dseq` objects and returns a slice of the `Dseq` object, defined by the a start value and a stop value, similarly to string indexing. In other words, `__getitem__` indexes `Dseq`. Note that '__getitem__' (and, consequently, `[]`) uses zero-based indexing." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-3)\n", - "tat\n", - "ata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatataa\")\n", - "my_seq[2:5]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`__getitem__` respects overhangs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "tata\n", - "atatt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq.from_full_sequence_and_overhangs(\"aatataa\", crick_ovhg=0, watson_ovhg=-1)\n", - "my_seq[2:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that index zero corresponds to the leftmost base of the sequence, which might not necessarily be on the `watson` strand. Let's create a sequence that has an overhang on the left side." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-10)\n", - " acgttcc\n", - "ttatgcaagg" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sequence_with_overhangs = Dseq.from_full_sequence_and_overhangs(\"aatacgttcc\", crick_ovhg=3, watson_ovhg=0)\n", - "sequence_with_overhangs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When we index starting from `2`, we don't start counting on the watson, but on the crick strand since it is the leftmost one." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-8)\n", - " acgttcc\n", - "atgcaagg" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sequence_with_overhangs[2:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Slicing circular sequences\n", - "When slicing circular `Dseq` objects we get linear sequences." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-4)\n", - "atct\n", - "taga" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "circular_seq = Dseq(\"aatctaa\", circular=True)\n", - "circular_seq[1:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can slice circular sequences across the origin (where index is zero) if the first index is bigger than the second index. This is demonstrated in the example below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-4)\n", - "aaaa\n", - "tttt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "circular_seq[5:2]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Printing sequences to the console: `__repr__` and `__str__`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`__repr__` and `__str__` are methods present in all python classes that return a string representation of an object. `__str__` is called by the `print` function, and `__repr__` is used by the console or notebook output when the object is not assigned to a variable. Below is an example with a `date` object:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> print statement: 2023-08-15\n", - "> repr: datetime.date(2023, 8, 15)\n", - "> repr from class method: datetime.date(2023, 8, 15)\n", - "\n", - "> console output:\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Representing sequences in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pydna contains classes to represent double stranded DNA sequences that can:\n", + "\n", + "* Be linear\n", + "* Be circular\n", + "* Contain overhangs (sticky ends).\n", + "\n", + "These sequences can be used to simulate molecular biology methods such as cloning and PCR. The main classes used to represent sequences are `Dseq` and `Dseqrecord`.\n", + "* `Dseq` represents the sequence only. Think of it as a FASTA file.\n", + "* `Dseqrecord` can contain sequence features and other info such as publication, authors, etc. Think of it as a Genbank file.\n", + "\n", + "> NOTE: The `Dseq` class is a subclass of biopython's `Seq`, whose documentation can be found [here](https://biopython.org/wiki/Seq). `Dseqrecord` is a subclass of biopython's `SeqRecord`, whose documentation can be found [here](https://biopython.org/wiki/SeqRecord)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Dseq Class\n", + "\n", + "We can create a `Dseq` object in different ways.\n", + "\n", + "For a linear sequence without overhangs, we create a `Dseq` object passing a string with the sequence. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "aatat\n", + "ttata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.dseq import Dseq\n", + "my_seq = Dseq(\"aatat\")\n", + "my_seq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the console representation above, there are three lines:\n", + "1. `Dseq(-5)` indicates that the sequence is linear and has 5 basepairs.\n", + "2. `aatat`, the top / sense / watson strand, referred from now on as **watson** strand..\n", + "3. `ttata`, the bottom / anti-sense / crick strand, referred from now on as **crick** strand.\n", + "\n", + "Now, let's create a circular sequence:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(o5)\n", + "aatat\n", + "ttata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", circular=True)\n", + "my_seq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Note how `o5` indicates that the sequence is circular and has 5 basepairs.\n", + "\n", + "One way to represent a linear sequence with overhangs is to instantiate `Dseq` with the following arguments:\n", + "* The `watson` strand as a string in the 5'-3' direction.\n", + "* The `crick` strand as a string in the 5'-3' direction.\n", + "* The 5' overhang `ovhg` (overhang), which can be positive or negative, and represents the number of basepairs that the `watson` strand extends beyond the `crick` strand." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "actag\n", + " gatc" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"actag\", \"ctag\", -1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Note how the bottom strand is passed in the 5'-3' direction, but it is represented in the 3'-5' direction in the console output.\n", + "\n", + "If you omit the `ovhg` argument, pydna will try to find the value that makes the `watson` and `crick` strands complementary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "actag\n", + " gatc" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"actag\", \"ctag\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The best way to get a feeling for the meaning of `ovhg` is to visualise the possible scenarios as such:\n", + "\n", + "```\n", + "dsDNA overhang\n", + "\n", + " nnn... 2\n", + "nnnnn...\n", + "\n", + " nnnn... 1\n", + "nnnnn...\n", + "\n", + "nnnnn... 0\n", + "nnnnn...\n", + "\n", + "nnnnn... -1\n", + " nnnn...\n", + "\n", + "nnnnn... -2\n", + " nnn...\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of note, the DNA sequence can be passed in both lower case and upper case, and are not restricted to the conventional ATCG nucleotides (E.g ), The class supports the IUPAC ambiguous nucleotide code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "Actag\n", + " gatC" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"Actag\", \"Ctag\", -1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another way to pass the overhangs is to use the `from_full_sequence_and_overhangs` classmethod, which only needs the `watson`/sense strand. This is useful you can only store the entire sequence (e.g. in a FASTA file), or if you want to specify overhangs on both sides of the double stranded DNA when you create the object.\n", + "\n", + "Both the `watson_ovhg` and `crick_ovhg` can be passed following the same rules as above. Specifically, the `crick_ovhg` argument is identical to the conventional `ovhg` argument. The `watson_ovhg` argument is the `ovhg` argument applied to the reverse complementary sequence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-8)\n", + "aaatta\n", + " aattt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", + "my_seq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A list of possible scenarios, applying positive and negative `crick_ovhg` and `watson_ovhg` to a `Dseq` object are visualised in the output of the code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "watson_ovhg is -3, crick_ovhg is -2\n", + "Dseq(-8)\n", + "aaatt\n", + " taattt\n", + "\n", + "watson_ovhg is 3, crick_ovhg is -2\n", + "Dseq(-8)\n", + "aaattaaa\n", + " taa\n", + "\n", + "watson_ovhg is -3, crick_ovhg is 2\n", + "Dseq(-8)\n", + " att\n", + "tttaattt\n", + "\n", + "watson_ovhg is 3, crick_ovhg is 2\n", + "Dseq(-8)\n", + " attaaa\n", + "tttaa\n", + "\n" + ] + } + ], + "source": [ + "for crick_ovhg in [-2, 2]:\n", + " for watson_ovhg in [-3, 3]:\n", + " print(\"watson_ovhg is \" + str(watson_ovhg) + \", crick_ovhg is \" + str(crick_ovhg))\n", + " my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg, watson_ovhg)\n", + " print(my_seq.__repr__() + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The drawing below can help visualize the meaning of the overhangs.\n", + "```\n", + " (-3)--(-2)--(-1)--(x)--(x)--(x)--(-1)--(-2)\n", + "\n", + "5'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)3'\n", + "3'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)5'\n", + "\n", + "5'( a)--( a)--( a)--(t)--(t)--(a)--( )--( )3'\n", + "3'( )--( )--( )--(t)--(t)--(a)--( a)--( a)5'\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to check the overhangs for a `Dseq` object, it can be done by calling the methods `five_prime_end` and `three_prime_end` to show the 5' and 3' overhangs, respectively. An example of a `Dseq` object, and examples showing what the print-out of the methods looks like are demonstrated here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseq(-7)\n", + "aatat\n", + " atatt\n", + "(\"5'\", 'aa')\n", + "(\"5'\", 'tt')\n" + ] + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", + "print(my_seq.__repr__())\n", + "print(my_seq.five_prime_end())\n", + "print(my_seq.three_prime_end())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you now want to join your sequence's sticky ends to make a circular sequence (i.e Plasmid), you can use the `looped` method. The sticky ends must be compatible to do so.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(o5)\n", + "aatat\n", + "ttata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", + "my_seq.looped()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to change the circular origin of the sequence/plasmid, this can be easily done using the `shifted` method. This can be done by providing the number of bases between the original origin with the new origin: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(o5)\n", + "tataa\n", + "atatt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", circular=True)\n", + "my_seq.shifted(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## __getitem__, __repr__, and __str__ methods\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Slicing sequences (`__getitem__`)\n", + "\n", + "`__getitem__` is the method that is called when you use the square brackets `[]` after a python object. Below is an example of the builtin python `list`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "using square brackets: [2, 3]\n", + "is the same as using __getitem__: [2, 3]\n" + ] + } + ], + "source": [ + "my_list = [1, 2, 3]\n", + "\n", + "print('using square brackets:', my_list[1:])\n", + "print('is the same as using __getitem__:', my_list.__getitem__(slice(1, None)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `__getitem__` method is modified in pydna to deal with `Dseq` objects and returns a slice of the `Dseq` object, defined by the a start value and a stop value, similarly to string indexing. In other words, `__getitem__` indexes `Dseq`. Note that '__getitem__' (and, consequently, `[]`) uses zero-based indexing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-3)\n", + "tat\n", + "ata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatataa\")\n", + "my_seq[2:5]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`__getitem__` respects overhangs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "tata\n", + "atatt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq.from_full_sequence_and_overhangs(\"aatataa\", crick_ovhg=0, watson_ovhg=-1)\n", + "my_seq[2:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that index zero corresponds to the leftmost base of the sequence, which might not necessarily be on the `watson` strand. Let's create a sequence that has an overhang on the left side." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-10)\n", + " acgttcc\n", + "ttatgcaagg" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_with_overhangs = Dseq.from_full_sequence_and_overhangs(\"aatacgttcc\", crick_ovhg=3, watson_ovhg=0)\n", + "sequence_with_overhangs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When we index starting from `2`, we don't start counting on the watson, but on the crick strand since it is the leftmost one." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-8)\n", + " acgttcc\n", + "atgcaagg" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_with_overhangs[2:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Slicing circular sequences\n", + "When slicing circular `Dseq` objects we get linear sequences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-4)\n", + "atct\n", + "taga" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circular_seq = Dseq(\"aatctaa\", circular=True)\n", + "circular_seq[1:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can slice circular sequences across the origin (where index is zero) if the first index is bigger than the second index. This is demonstrated in the example below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-4)\n", + "aaaa\n", + "tttt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circular_seq[5:2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Printing sequences to the console: `__repr__` and `__str__`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`__repr__` and `__str__` are methods present in all python classes that return a string representation of an object. `__str__` is called by the `print` function, and `__repr__` is used by the console or notebook output when the object is not assigned to a variable. Below is an example with a `date` object:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> print statement: 2023-08-15\n", + "> repr: datetime.date(2023, 8, 15)\n", + "> repr from class method: datetime.date(2023, 8, 15)\n", + "\n", + "> console output:\n" + ] + }, + { + "data": { + "text/plain": [ + "datetime.date(2023, 8, 15)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import datetime\n", + "\n", + "my_date = datetime.date(2023, 8, 15)\n", + "\n", + "print('> print statement:', my_date)\n", + "print('> repr:', repr(my_date))\n", + "print('> repr from class method:', my_date.__repr__())\n", + "\n", + "print()\n", + "print('> console output:')\n", + "my_date" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In a similar way, `__repr__` and `__str__` methods are used by pydna to represent sequences as strings for different purposes:\n", + "\n", + "* `__repr__` is used to make a figure-like representation that shows both strands and the overhangs.\n", + "* `__str__` is used to return the entire sequence as a string of characters (from the left-most to the right-most base of both strands), the way we would store it in a FASTA file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> figure-like representation:\n", + " Dseq(-8)\n", + "aaatta\n", + " aattt\n", + "\n", + "> string representation:\n", + " aaattaaa\n" + ] + } + ], + "source": [ + "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", + "print('> figure-like representation:\\n', my_seq.__repr__())\n", + "print()\n", + "print('> string representation:\\n', my_seq)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that on the string representation, the bases correspond to the entire sequence provided, even when they are only present on either the `watson` or `crick` strand. In the example above, the last two `aa` bases are missing from the `watson` strand, and that only the `crick` strand has them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Edge cases\n", + "\n", + "You can create arbitrary double-stranded sequences that are not complementary if you specify both strands and an overhang, but you won't be able to use them for molecular biology simulations. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-6)\n", + " xxxx\n", + "tata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"xxxx\", \"atat\", ovhg=2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } }, - { - "data": { - "text/plain": [ - "datetime.date(2023, 8, 15)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import datetime\n", - "\n", - "my_date = datetime.date(2023, 8, 15)\n", - "\n", - "print('> print statement:', my_date)\n", - "print('> repr:', repr(my_date))\n", - "print('> repr from class method:', my_date.__repr__())\n", - "\n", - "print()\n", - "print('> console output:')\n", - "my_date" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In a similar way, `__repr__` and `__str__` methods are used by pydna to represent sequences as strings for different purposes:\n", - "\n", - "* `__repr__` is used to make a figure-like representation that shows both strands and the overhangs.\n", - "* `__str__` is used to return the entire sequence as a string of characters (from the left-most to the right-most base of both strands), the way we would store it in a FASTA file.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> figure-like representation:\n", - " Dseq(-8)\n", - "aaatta\n", - " aattt\n", - "\n", - "> string representation:\n", - " aaattaaa\n" - ] - } - ], - "source": [ - "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", - "print('> figure-like representation:\\n', my_seq.__repr__())\n", - "print()\n", - "print('> string representation:\\n', my_seq)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that on the string representation, the bases correspond to the entire sequence provided, even when they are only present on either the `watson` or `crick` strand. In the example above, the last two `aa` bases are missing from the `watson` strand, and that only the `crick` strand has them." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Edge cases\n", - "\n", - "You can create arbitrary double-stranded sequences that are not complementary if you specify both strands and an overhang, but you won't be able to use them for molecular biology simulations. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-6)\n", - " xxxx\n", - "tata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"xxxx\", \"atat\", ovhg=2)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Dseq_Features.ipynb b/docs/notebooks/Dseq_Features.ipynb index 90e50d14..8dd0c903 100644 --- a/docs/notebooks/Dseq_Features.ipynb +++ b/docs/notebooks/Dseq_Features.ipynb @@ -1,821 +1,821 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Working with Features using the Dseqrecord class\n", - "\n", - "> Before working with features, check how to import sequences from files in the [Importing_Seqs notebook](./Importing_Seqs.ipynb).\n", - ">\n", - "> For full library documentation, visit [here](https://bjornfjohansson.github.io/pydna/).\n", - "\n", - "Some sequence file formats (like Genbank) include features, describing key biological properties of sequence regions. In Genbank, features \"include genes, gene products, as well as regions of biological significance reported in the sequence.\" (See [here](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/) for a description of a Genbank file and associated terminologies/annotations) Examples include coding sequences (CDS), introns, promoters, etc.\n", - "\n", - "pydna offers many ways to easily view, add, extract, and write features into a Genbank file via the `Dseqrecord` class. After reading a file into a `Dseqrecord` object, we can check out the list of features in the record using the following code. This example uses the sample record [U49845](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: source\n", - "location: [0:5028](+)\n", - "qualifiers:\n", - " Key: chromosome, Value: ['IX']\n", - " Key: db_xref, Value: ['taxon:4932']\n", - " Key: mol_type, Value: ['genomic DNA']\n", - " Key: organism, Value: ['Saccharomyces cerevisiae']\n", - "\n", - "type: mRNA\n", - "location: [<0:>206](+)\n", - "qualifiers:\n", - " Key: product, Value: ['TCP1-beta']\n", - "\n", - "type: CDS\n", - "location: [<0:206](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['3']\n", - " Key: product, Value: ['TCP1-beta']\n", - " Key: protein_id, Value: ['AAA98665.1']\n", - " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", - "\n", - "type: gene\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - "\n", - "type: mRNA\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - " Key: product, Value: ['Axl2p']\n", - "\n", - "type: CDS\n", - "location: [686:3158](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['AXL2']\n", - " Key: note, Value: ['plasma membrane glycoprotein']\n", - " Key: product, Value: ['Axl2p']\n", - " Key: protein_id, Value: ['AAA98666.1']\n", - " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", - "\n", - "type: gene\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - "\n", - "type: mRNA\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - "\n", - "type: CDS\n", - "location: [3299:4037](-)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - " Key: protein_id, Value: ['AAA98667.1']\n", - " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", - "\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.parsers import parse\n", - "\n", - "#Import your file into python. \n", - "file_path = \"./U49845.gb\"\n", - "records = parse(file_path)\n", - "sample_record = records[0]\n", - "\n", - "# List all features\n", - "for feature in sample_record.features:\n", - " print(feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additional ways to view and search for particular features are shown at the bottom of the page under \"Other Methods to Viewing Features\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adding Features and Qualifiers\n", - "\n", - "To add new feature to describe a region of interest to a record, for instance a region that you would like to perform a PCR, you need to create a `SeqFeature` (sequence feature). The minimal information required is:\n", - "* A `FeatureLocation`: position of the feature in the sequence.\n", - "* The `type` of feature you want to add.\n", - "\n", - "\n", - "🚨🚨 **VERY IMPORTANT** 🚨🚨. Note that `FeatureLocation`s are like python ranges (zero-based open intervals), whereas in GenBank files, locations are one-based closed intervals. For instance, the following code adds a new feature from the 2nd to the 5th nucleotide (`FeatureLocation(3, 15)`), of the `gene` type, but in the GenBank file will be represented as `4..15`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: gene\n", - "location: [3:15]\n", - "qualifiers:\n", - "\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "from Bio.SeqFeature import FeatureLocation, SeqFeature\n", - "\n", - "# Create a dummy record\n", - "dummy_record = Dseqrecord(\"aaaATGCGTACGTGAacgt\")\n", - "\n", - "# Define the locations of a CDS\n", - "location = FeatureLocation(3, 15)\n", - "\n", - "# Create a SeqFeature with the type mRNA\n", - "my_feature = SeqFeature(location=location, type=\"gene\")\n", - "\n", - "# Add my_feature to dummy_record with .append\n", - "dummy_record.features.append(my_feature)\n", - "\n", - "# Confirm that my_feature has been added\n", - "print(dummy_record.features[-1])\n", - "\n", - "# Print the feature in GenBank format (see how the location is `4..15`)\n", - "print(dummy_record.format(\"genbank\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To give further information about a feature, we can add a qualifier using the `qualifiers` property of `SeqFeature`, which contains a dictionary of qualifiers. For instance, if I would like to note a new feature of type 'domain', between 3-9 bases as my region of interest, I can instantiate the `SeqFeature` class object as such.\n", - "\n", - "> Note that a new feature is always added to the last position of the features list." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">> Feature was added:\n", - "type: domain\n", - "location: [3:9]\n", - "qualifiers:\n", - " Key: Note, Value: ['Region of interest']\n", - "\n", - "\n", - ">> GenBank format:\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - " domain 4..9\n", - " /Note=\"Region of interest\"\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "location = FeatureLocation(3, 9)\n", - "\n", - "# Create a SeqFeature with a qualifier\n", - "my_feature2 = SeqFeature(location=location, type=\"domain\", qualifiers={\"Note\": [\"Region of interest\"]})\n", - "\n", - "# Add my_feature to my_record with .append\n", - "dummy_record.features.append(my_feature2)\n", - "\n", - "# Confirm that my_feature has been added\n", - "print('>> Feature was added:')\n", - "print(dummy_record.features[-1])\n", - "print()\n", - "\n", - "# Print the feature in GenBank format\n", - "print('>> GenBank format:')\n", - "print(dummy_record.format(\"genbank\"))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**🤔 Best practices for qualifiers:**\n", - "\n", - "The values in the `qualifiers` dictionary should be lists. The reason for this is that in a GenBank file, a single feature can have multiple values for a single qualifier. Below is a real world of the ase1 CDS example from the _S. pombe_ genome in EMBL format:\n", - "\n", - "```\n", - "FT CDS join(1878362..1878785,1878833..1880604)\n", - "FT /colour=2\n", - "FT /primary_name=\"ase1\"\n", - "FT /product=\"antiparallel microtubule cross-linking factor\n", - "FT Ase1\"\n", - "FT /systematic_id=\"SPAPB1A10.09\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in eukaryotes; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in metazoa; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in vertebrates; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution,\n", - "FT predominantly single copy (one to one); date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in fungi; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in eukaryotes only; date=20081110\"\n", - "```\n", - "\n", - "Note how there are several `controlled_curation` qualifiers, therefore it makes sense to store them as a list.\n", - "\n", - "By default, you can add any type of object in the qualifiers dictionary, and most things will work if you add a string. However, you risk overwriting the existing value for a qualifier, so best practice is:\n", - "1. Check if the qualifier already exists using `if \"qualifier_name\" in feature.qualifiers`\n", - "2. If it exists, append to the existing list of values using `feature.qualifiers[\"qualifier_name\"].append(\"new_value\")`\n", - "3. If it does not exist, add it to the qualifiers dictionary using `feature.qualifiers[\"qualifier_name\"] = [\"new_value\"]`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that `Bio.SeqFeatures` does not automatically assume a sequence strand for the feature. If you would like to refer to a feature on the positive or minus strand, you can add a parameter in `FeatureLocation` specifying `strand=+1` or `strand=-1`. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: domain\n", - "location: [15:19](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['example_domain']\n", - "\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - " domain 4..9\n", - " /Note=\"Region of interest\"\n", - " domain complement(16..19)\n", - " /gene=\"example_domain\"\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "#Create a location specifying the minus strand\n", - "location = FeatureLocation(15, 19, strand=-1)\n", - "\n", - "my_feature3 = SeqFeature(location=location, type=\"domain\", qualifiers={\"gene\":[\"example_domain\"]})\n", - "\n", - "dummy_record.features.append(my_feature3)\n", - "\n", - "print(dummy_record.features[-1])\n", - "\n", - "print(dummy_record.format(\"genbank\"))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Adding a Feature with Parts\n", - "\n", - "To add a feature with parts, like a CDS with introns, we need to use a `CompoundLocation` object when creating a `SeqFeature`.\n", - "\n", - "The example code below adds a CDS with two parts, between 3-9bp and 12-15bp, to my features list. In a real-world scenario this would represent a CDS with an intron that skips the `ACG` codon: ATGCGT~~ACG~~TGA" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: CDS\n", - "location: join{[3:9], [12:15]}\n", - "qualifiers:\n", - " Key: gene, Value: ['example_gene']\n", - "\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - " domain 4..9\n", - " /Note=\"Region of interest\"\n", - " domain complement(16..19)\n", - " /gene=\"example_domain\"\n", - " CDS join(4..9,13..15)\n", - " /gene=\"example_gene\"\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "from Bio.SeqFeature import CompoundLocation\n", - "\n", - "# Define the locations of the CDS\n", - "locations = [FeatureLocation(3, 9), FeatureLocation(12, 15)]\n", - "\n", - "# Create a compound location from these parts\n", - "compound_location = CompoundLocation(locations)\n", - "\n", - "# Create a SeqFeature with this compound location, including type and qualifiers. \n", - "cds_feature = SeqFeature(location=compound_location, type=\"CDS\", qualifiers={\"gene\": [\"example_gene\"]})\n", - "\n", - "# Add the feature to the Dseqrecord\n", - "dummy_record.features.append(cds_feature)\n", - "\n", - "print(dummy_record.features[-1])\n", - "\n", - "print(dummy_record.format(\"genbank\"))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can even extract a protein record as follows (see how the protein sequence is `MR`, skipping the intron):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "ProteinSeq('MR')\n" - ] - } - ], - "source": [ - "sub_record = dummy_record.features[-1].extract(dummy_record)\n", - "\n", - "print(sub_record.translate())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Standard Feature Types and Qualifiers\n", - "\n", - "`pydna` and `Bio.SeqFeature` suppports all the conventional feature types through the `type` parameters. A non-exhaustive list include gene, CDS, promoter, exon, intron, 5' UTR, 3' UTR, terminator, enhancer, and RBS. You can also define custom features, which could be useful for synthetic biology applications. For instance, you might want to have Bio_brick or spacer features to describe a synthetic standardised plasmid construct. \n", - "\n", - "It is important to note that while `pydna` and `Bio.SeqFeature` does not restrict the feature types you can use, sticking to standard types helps maintain compatibility with other bioinformatics tools and databases. Please refer to the official [GenBank_Feature_Table](https://www.insdc.org/submitting-standards/feature-table/#2), that lists the standard feature types and their associated qualifiers." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Further documentation for `SeqFeature`, `CompoundLocation`, and `FeatureLocation` can be found in the `SeqFeature` module [here](https://biopython.org/docs/1.75/api/Bio.SeqFeature.html). " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Handling Origin Spanning Features\n", - "\n", - "An origin spanning feature is a special type of feature that crosses over a circular sequence's origin. In pydna, such a feature is represented as a feature with parts, joining the part of the sequence immediately before the origin and immediately after the origin. They can be added using `CompoundLocation` as normal. \n", - "\n", - "An origin spanning feature, between base 19 to base 6, in a 25bp long circular sequence, is represented like so: \n", - "\n", - "```\n", - "type: gene \n", - "location: join{[19:25](+), [0:6](+)} \n", - "qualifiers: gene, Value: example_gene \n", - "```\n", - "\n", - "This feature will be displayed as a single feature in SnapGene viewer and Benchling, since they support this convention." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">> Feature:\n", - "type: misc\n", - "location: join{[19:25], [0:6]}\n", - "qualifiers:\n", - " Key: gene, Value: ['example origin spanning gene']\n", - "\n", - ">> Feature sequence:\n", - "ATGCGTACGTGA\n", - "\n", - ">> GenBank format:\n", - "LOCUS name 25 bp DNA circular UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " misc join(20..25,1..6)\n", - " /gene=\"example origin spanning gene\"\n", - "ORIGIN\n", - " 1 acgtgaaaaa aaaaaaaaaa tgcgt\n", - "//\n" - ] - } - ], - "source": [ - "circular_record = Dseqrecord('ACGTGAaaaaaaaaaaaaaATGCGT', circular=True)\n", - "\n", - "location = [FeatureLocation(19,25), FeatureLocation(0, 6)]\n", - "ori_feat_location = CompoundLocation(location)\n", - "ori_feature = SeqFeature(location=ori_feat_location, type=\"misc\", qualifiers={\"gene\": [\"example origin spanning gene\"]})\n", - "circular_record.features.append(ori_feature)\n", - "\n", - "print('>> Feature:')\n", - "print(circular_record.features[-1])\n", - "\n", - "# Note how the feature sequence is extracted properly across the origin.\n", - "print('>> Feature sequence:')\n", - "print(circular_record.features[-1].extract(circular_record).seq)\n", - "print()\n", - "\n", - "print('>> GenBank format:')\n", - "print(circular_record.format(\"genbank\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Other Methods to Viewing Features\n", - "\n", - "pydna also provides the `list_features` method as a simple way to list all the features in a `Dseqrecord` object. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----+------------------+-----+-------+-------+------+--------+------+\n", - "| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n", - "+-----+------------------+-----+-------+-------+------+--------+------+\n", - "| 0 | nd | --> | 0 | 5028 | 5028 | source | no |\n", - "| 1 | nd | --> | <0 | >206 | 206 | mRNA | no |\n", - "| 2 | nd | --> | <0 | 206 | 206 | CDS | no |\n", - "| 3 | nd | --> | <686 | >3158 | 2472 | gene | yes |\n", - "| 4 | nd | --> | <686 | >3158 | 2472 | mRNA | yes |\n", - "| 5 | N:plasma membran | --> | 686 | 3158 | 2472 | CDS | yes |\n", - "| 6 | nd | <-- | <3299 | >4037 | 738 | gene | yes |\n", - "| 7 | nd | <-- | <3299 | >4037 | 738 | mRNA | yes |\n", - "| 8 | nd | <-- | 3299 | 4037 | 738 | CDS | yes |\n", - "+-----+------------------+-----+-------+-------+------+--------+------+\n" - ] - } - ], - "source": [ - "print(sample_record.list_features())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This method is convenient for checking-out a brief overview of each feature, without reading through an entire sequence record.\n", - "\n", - "Alternatively, we can look for specific features using their qualifiers. For instance:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting all CDS features:\n", - "type: CDS\n", - "location: [<0:206](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['3']\n", - " Key: product, Value: ['TCP1-beta']\n", - " Key: protein_id, Value: ['AAA98665.1']\n", - " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", - "\n", - "type: CDS\n", - "location: [686:3158](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['AXL2']\n", - " Key: note, Value: ['plasma membrane glycoprotein']\n", - " Key: product, Value: ['Axl2p']\n", - " Key: protein_id, Value: ['AAA98666.1']\n", - " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", - "\n", - "type: CDS\n", - "location: [3299:4037](-)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - " Key: protein_id, Value: ['AAA98667.1']\n", - " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", - "\n" - ] - } - ], - "source": [ - "# Filter based on feature type\n", - "print('Getting all CDS features:')\n", - "cds_features = [f for f in sample_record.features if f.type == \"CDS\"]\n", - "for feature in cds_features:\n", - " print(feature)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: gene\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - "\n" - ] - } - ], - "source": [ - "# Find a particular feature by its qualifier (e.g. gene name)\n", - "rev7_cds_feature = next(f for f in sample_record.features if \n", - " f.type == \"gene\" and\n", - " \"gene\" in f.qualifiers and \"REV7\" in f.qualifiers[\"gene\"]\n", - " )\n", - "\n", - "print(rev7_cds_feature)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you would like to search for another type of features, simply replace the `\"gene\"` with your desired feature type in quotation marks." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Removing Features\n", - "\n", - "In pydna, we can search for the feature that we would like to remove using the feature's types or qualififers. For instance, we can modify the features list to exclude all CDS:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: source\n", - "location: [0:5028](+)\n", - "qualifiers:\n", - " Key: chromosome, Value: ['IX']\n", - " Key: db_xref, Value: ['taxon:4932']\n", - " Key: mol_type, Value: ['genomic DNA']\n", - " Key: organism, Value: ['Saccharomyces cerevisiae']\n", - "\n", - "type: mRNA\n", - "location: [<0:>206](+)\n", - "qualifiers:\n", - " Key: product, Value: ['TCP1-beta']\n", - "\n", - "type: gene\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - "\n", - "type: mRNA\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - " Key: product, Value: ['Axl2p']\n", - "\n", - "type: gene\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - "\n", - "type: mRNA\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - "\n" - ] - } - ], - "source": [ - "#Remove all CDS type features from my feature list\n", - "sample_record.features = [f for f in sample_record.features if not (f.type == \"CDS\")]\n", - "\n", - "for feature in sample_record.features:\n", - " print(feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also modify the features list to exclude a specific gene:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: source\n", - "location: [0:5028](+)\n", - "qualifiers:\n", - " Key: chromosome, Value: ['IX']\n", - " Key: db_xref, Value: ['taxon:4932']\n", - " Key: mol_type, Value: ['genomic DNA']\n", - " Key: organism, Value: ['Saccharomyces cerevisiae']\n", - "\n", - "type: mRNA\n", - "location: [<0:>206](+)\n", - "qualifiers:\n", - " Key: product, Value: ['TCP1-beta']\n", - "\n", - "type: gene\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - "\n", - "type: mRNA\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - " Key: product, Value: ['Axl2p']\n", - "\n" - ] - } - ], - "source": [ - "#Exclude REV7 from my feature list\n", - "sample_record.features = [f for f in sample_record.features if not ('gene' in f.qualifiers and 'REV7' in f.qualifiers['gene'])]\n", - "\n", - "for feature in sample_record.features:\n", - " print(feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with Features using the Dseqrecord class\n", + "\n", + "> Before working with features, check how to import sequences from files in the [Importing_Seqs notebook](./Importing_Seqs.ipynb).\n", + ">\n", + "> For full library documentation, visit [here](https://pydna-group.github.io/pydna/).\n", + "\n", + "Some sequence file formats (like Genbank) include features, describing key biological properties of sequence regions. In Genbank, features \"include genes, gene products, as well as regions of biological significance reported in the sequence.\" (See [here](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/) for a description of a Genbank file and associated terminologies/annotations) Examples include coding sequences (CDS), introns, promoters, etc.\n", + "\n", + "pydna offers many ways to easily view, add, extract, and write features into a Genbank file via the `Dseqrecord` class. After reading a file into a `Dseqrecord` object, we can check out the list of features in the record using the following code. This example uses the sample record [U49845](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: source\n", + "location: [0:5028](+)\n", + "qualifiers:\n", + " Key: chromosome, Value: ['IX']\n", + " Key: db_xref, Value: ['taxon:4932']\n", + " Key: mol_type, Value: ['genomic DNA']\n", + " Key: organism, Value: ['Saccharomyces cerevisiae']\n", + "\n", + "type: mRNA\n", + "location: [<0:>206](+)\n", + "qualifiers:\n", + " Key: product, Value: ['TCP1-beta']\n", + "\n", + "type: CDS\n", + "location: [<0:206](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['3']\n", + " Key: product, Value: ['TCP1-beta']\n", + " Key: protein_id, Value: ['AAA98665.1']\n", + " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", + "\n", + "type: gene\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + "\n", + "type: mRNA\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + " Key: product, Value: ['Axl2p']\n", + "\n", + "type: CDS\n", + "location: [686:3158](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['AXL2']\n", + " Key: note, Value: ['plasma membrane glycoprotein']\n", + " Key: product, Value: ['Axl2p']\n", + " Key: protein_id, Value: ['AAA98666.1']\n", + " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", + "\n", + "type: gene\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + "\n", + "type: mRNA\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + "\n", + "type: CDS\n", + "location: [3299:4037](-)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + " Key: protein_id, Value: ['AAA98667.1']\n", + " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", + "\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.parsers import parse\n", + "\n", + "#Import your file into python. \n", + "file_path = \"./U49845.gb\"\n", + "records = parse(file_path)\n", + "sample_record = records[0]\n", + "\n", + "# List all features\n", + "for feature in sample_record.features:\n", + " print(feature)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional ways to view and search for particular features are shown at the bottom of the page under \"Other Methods to Viewing Features\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding Features and Qualifiers\n", + "\n", + "To add new feature to describe a region of interest to a record, for instance a region that you would like to perform a PCR, you need to create a `SeqFeature` (sequence feature). The minimal information required is:\n", + "* A `FeatureLocation`: position of the feature in the sequence.\n", + "* The `type` of feature you want to add.\n", + "\n", + "\n", + "🚨🚨 **VERY IMPORTANT** 🚨🚨. Note that `FeatureLocation`s are like python ranges (zero-based open intervals), whereas in GenBank files, locations are one-based closed intervals. For instance, the following code adds a new feature from the 2nd to the 5th nucleotide (`FeatureLocation(3, 15)`), of the `gene` type, but in the GenBank file will be represented as `4..15`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: gene\n", + "location: [3:15]\n", + "qualifiers:\n", + "\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "from Bio.SeqFeature import FeatureLocation, SeqFeature\n", + "\n", + "# Create a dummy record\n", + "dummy_record = Dseqrecord(\"aaaATGCGTACGTGAacgt\")\n", + "\n", + "# Define the locations of a CDS\n", + "location = FeatureLocation(3, 15)\n", + "\n", + "# Create a SeqFeature with the type mRNA\n", + "my_feature = SeqFeature(location=location, type=\"gene\")\n", + "\n", + "# Add my_feature to dummy_record with .append\n", + "dummy_record.features.append(my_feature)\n", + "\n", + "# Confirm that my_feature has been added\n", + "print(dummy_record.features[-1])\n", + "\n", + "# Print the feature in GenBank format (see how the location is `4..15`)\n", + "print(dummy_record.format(\"genbank\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To give further information about a feature, we can add a qualifier using the `qualifiers` property of `SeqFeature`, which contains a dictionary of qualifiers. For instance, if I would like to note a new feature of type 'domain', between 3-9 bases as my region of interest, I can instantiate the `SeqFeature` class object as such.\n", + "\n", + "> Note that a new feature is always added to the last position of the features list." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">> Feature was added:\n", + "type: domain\n", + "location: [3:9]\n", + "qualifiers:\n", + " Key: Note, Value: ['Region of interest']\n", + "\n", + "\n", + ">> GenBank format:\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + " domain 4..9\n", + " /Note=\"Region of interest\"\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "location = FeatureLocation(3, 9)\n", + "\n", + "# Create a SeqFeature with a qualifier\n", + "my_feature2 = SeqFeature(location=location, type=\"domain\", qualifiers={\"Note\": [\"Region of interest\"]})\n", + "\n", + "# Add my_feature to my_record with .append\n", + "dummy_record.features.append(my_feature2)\n", + "\n", + "# Confirm that my_feature has been added\n", + "print('>> Feature was added:')\n", + "print(dummy_record.features[-1])\n", + "print()\n", + "\n", + "# Print the feature in GenBank format\n", + "print('>> GenBank format:')\n", + "print(dummy_record.format(\"genbank\"))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**🤔 Best practices for qualifiers:**\n", + "\n", + "The values in the `qualifiers` dictionary should be lists. The reason for this is that in a GenBank file, a single feature can have multiple values for a single qualifier. Below is a real world of the ase1 CDS example from the _S. pombe_ genome in EMBL format:\n", + "\n", + "```\n", + "FT CDS join(1878362..1878785,1878833..1880604)\n", + "FT /colour=2\n", + "FT /primary_name=\"ase1\"\n", + "FT /product=\"antiparallel microtubule cross-linking factor\n", + "FT Ase1\"\n", + "FT /systematic_id=\"SPAPB1A10.09\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in eukaryotes; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in metazoa; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in vertebrates; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution,\n", + "FT predominantly single copy (one to one); date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in fungi; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in eukaryotes only; date=20081110\"\n", + "```\n", + "\n", + "Note how there are several `controlled_curation` qualifiers, therefore it makes sense to store them as a list.\n", + "\n", + "By default, you can add any type of object in the qualifiers dictionary, and most things will work if you add a string. However, you risk overwriting the existing value for a qualifier, so best practice is:\n", + "1. Check if the qualifier already exists using `if \"qualifier_name\" in feature.qualifiers`\n", + "2. If it exists, append to the existing list of values using `feature.qualifiers[\"qualifier_name\"].append(\"new_value\")`\n", + "3. If it does not exist, add it to the qualifiers dictionary using `feature.qualifiers[\"qualifier_name\"] = [\"new_value\"]`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `Bio.SeqFeatures` does not automatically assume a sequence strand for the feature. If you would like to refer to a feature on the positive or minus strand, you can add a parameter in `FeatureLocation` specifying `strand=+1` or `strand=-1`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: domain\n", + "location: [15:19](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['example_domain']\n", + "\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + " domain 4..9\n", + " /Note=\"Region of interest\"\n", + " domain complement(16..19)\n", + " /gene=\"example_domain\"\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "#Create a location specifying the minus strand\n", + "location = FeatureLocation(15, 19, strand=-1)\n", + "\n", + "my_feature3 = SeqFeature(location=location, type=\"domain\", qualifiers={\"gene\":[\"example_domain\"]})\n", + "\n", + "dummy_record.features.append(my_feature3)\n", + "\n", + "print(dummy_record.features[-1])\n", + "\n", + "print(dummy_record.format(\"genbank\"))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding a Feature with Parts\n", + "\n", + "To add a feature with parts, like a CDS with introns, we need to use a `CompoundLocation` object when creating a `SeqFeature`.\n", + "\n", + "The example code below adds a CDS with two parts, between 3-9bp and 12-15bp, to my features list. In a real-world scenario this would represent a CDS with an intron that skips the `ACG` codon: ATGCGT~~ACG~~TGA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: CDS\n", + "location: join{[3:9], [12:15]}\n", + "qualifiers:\n", + " Key: gene, Value: ['example_gene']\n", + "\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + " domain 4..9\n", + " /Note=\"Region of interest\"\n", + " domain complement(16..19)\n", + " /gene=\"example_domain\"\n", + " CDS join(4..9,13..15)\n", + " /gene=\"example_gene\"\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "from Bio.SeqFeature import CompoundLocation\n", + "\n", + "# Define the locations of the CDS\n", + "locations = [FeatureLocation(3, 9), FeatureLocation(12, 15)]\n", + "\n", + "# Create a compound location from these parts\n", + "compound_location = CompoundLocation(locations)\n", + "\n", + "# Create a SeqFeature with this compound location, including type and qualifiers. \n", + "cds_feature = SeqFeature(location=compound_location, type=\"CDS\", qualifiers={\"gene\": [\"example_gene\"]})\n", + "\n", + "# Add the feature to the Dseqrecord\n", + "dummy_record.features.append(cds_feature)\n", + "\n", + "print(dummy_record.features[-1])\n", + "\n", + "print(dummy_record.format(\"genbank\"))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can even extract a protein record as follows (see how the protein sequence is `MR`, skipping the intron):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "ProteinSeq('MR')\n" + ] + } + ], + "source": [ + "sub_record = dummy_record.features[-1].extract(dummy_record)\n", + "\n", + "print(sub_record.translate())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard Feature Types and Qualifiers\n", + "\n", + "`pydna` and `Bio.SeqFeature` suppports all the conventional feature types through the `type` parameters. A non-exhaustive list include gene, CDS, promoter, exon, intron, 5' UTR, 3' UTR, terminator, enhancer, and RBS. You can also define custom features, which could be useful for synthetic biology applications. For instance, you might want to have Bio_brick or spacer features to describe a synthetic standardised plasmid construct. \n", + "\n", + "It is important to note that while `pydna` and `Bio.SeqFeature` does not restrict the feature types you can use, sticking to standard types helps maintain compatibility with other bioinformatics tools and databases. Please refer to the official [GenBank_Feature_Table](https://www.insdc.org/submitting-standards/feature-table/#2), that lists the standard feature types and their associated qualifiers." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Further documentation for `SeqFeature`, `CompoundLocation`, and `FeatureLocation` can be found in the `SeqFeature` module [here](https://biopython.org/docs/1.75/api/Bio.SeqFeature.html). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handling Origin Spanning Features\n", + "\n", + "An origin spanning feature is a special type of feature that crosses over a circular sequence's origin. In pydna, such a feature is represented as a feature with parts, joining the part of the sequence immediately before the origin and immediately after the origin. They can be added using `CompoundLocation` as normal. \n", + "\n", + "An origin spanning feature, between base 19 to base 6, in a 25bp long circular sequence, is represented like so: \n", + "\n", + "```\n", + "type: gene \n", + "location: join{[19:25](+), [0:6](+)} \n", + "qualifiers: gene, Value: example_gene \n", + "```\n", + "\n", + "This feature will be displayed as a single feature in SnapGene viewer and Benchling, since they support this convention." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">> Feature:\n", + "type: misc\n", + "location: join{[19:25], [0:6]}\n", + "qualifiers:\n", + " Key: gene, Value: ['example origin spanning gene']\n", + "\n", + ">> Feature sequence:\n", + "ATGCGTACGTGA\n", + "\n", + ">> GenBank format:\n", + "LOCUS name 25 bp DNA circular UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " misc join(20..25,1..6)\n", + " /gene=\"example origin spanning gene\"\n", + "ORIGIN\n", + " 1 acgtgaaaaa aaaaaaaaaa tgcgt\n", + "//\n" + ] + } + ], + "source": [ + "circular_record = Dseqrecord('ACGTGAaaaaaaaaaaaaaATGCGT', circular=True)\n", + "\n", + "location = [FeatureLocation(19,25), FeatureLocation(0, 6)]\n", + "ori_feat_location = CompoundLocation(location)\n", + "ori_feature = SeqFeature(location=ori_feat_location, type=\"misc\", qualifiers={\"gene\": [\"example origin spanning gene\"]})\n", + "circular_record.features.append(ori_feature)\n", + "\n", + "print('>> Feature:')\n", + "print(circular_record.features[-1])\n", + "\n", + "# Note how the feature sequence is extracted properly across the origin.\n", + "print('>> Feature sequence:')\n", + "print(circular_record.features[-1].extract(circular_record).seq)\n", + "print()\n", + "\n", + "print('>> GenBank format:')\n", + "print(circular_record.format(\"genbank\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other Methods to Viewing Features\n", + "\n", + "pydna also provides the `list_features` method as a simple way to list all the features in a `Dseqrecord` object. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----+------------------+-----+-------+-------+------+--------+------+\n", + "| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n", + "+-----+------------------+-----+-------+-------+------+--------+------+\n", + "| 0 | nd | --> | 0 | 5028 | 5028 | source | no |\n", + "| 1 | nd | --> | <0 | >206 | 206 | mRNA | no |\n", + "| 2 | nd | --> | <0 | 206 | 206 | CDS | no |\n", + "| 3 | nd | --> | <686 | >3158 | 2472 | gene | yes |\n", + "| 4 | nd | --> | <686 | >3158 | 2472 | mRNA | yes |\n", + "| 5 | N:plasma membran | --> | 686 | 3158 | 2472 | CDS | yes |\n", + "| 6 | nd | <-- | <3299 | >4037 | 738 | gene | yes |\n", + "| 7 | nd | <-- | <3299 | >4037 | 738 | mRNA | yes |\n", + "| 8 | nd | <-- | 3299 | 4037 | 738 | CDS | yes |\n", + "+-----+------------------+-----+-------+-------+------+--------+------+\n" + ] + } + ], + "source": [ + "print(sample_record.list_features())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is convenient for checking-out a brief overview of each feature, without reading through an entire sequence record.\n", + "\n", + "Alternatively, we can look for specific features using their qualifiers. For instance:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Getting all CDS features:\n", + "type: CDS\n", + "location: [<0:206](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['3']\n", + " Key: product, Value: ['TCP1-beta']\n", + " Key: protein_id, Value: ['AAA98665.1']\n", + " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", + "\n", + "type: CDS\n", + "location: [686:3158](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['AXL2']\n", + " Key: note, Value: ['plasma membrane glycoprotein']\n", + " Key: product, Value: ['Axl2p']\n", + " Key: protein_id, Value: ['AAA98666.1']\n", + " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", + "\n", + "type: CDS\n", + "location: [3299:4037](-)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + " Key: protein_id, Value: ['AAA98667.1']\n", + " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", + "\n" + ] + } + ], + "source": [ + "# Filter based on feature type\n", + "print('Getting all CDS features:')\n", + "cds_features = [f for f in sample_record.features if f.type == \"CDS\"]\n", + "for feature in cds_features:\n", + " print(feature)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: gene\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + "\n" + ] + } + ], + "source": [ + "# Find a particular feature by its qualifier (e.g. gene name)\n", + "rev7_cds_feature = next(f for f in sample_record.features if \n", + " f.type == \"gene\" and\n", + " \"gene\" in f.qualifiers and \"REV7\" in f.qualifiers[\"gene\"]\n", + " )\n", + "\n", + "print(rev7_cds_feature)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to search for another type of features, simply replace the `\"gene\"` with your desired feature type in quotation marks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removing Features\n", + "\n", + "In pydna, we can search for the feature that we would like to remove using the feature's types or qualififers. For instance, we can modify the features list to exclude all CDS:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: source\n", + "location: [0:5028](+)\n", + "qualifiers:\n", + " Key: chromosome, Value: ['IX']\n", + " Key: db_xref, Value: ['taxon:4932']\n", + " Key: mol_type, Value: ['genomic DNA']\n", + " Key: organism, Value: ['Saccharomyces cerevisiae']\n", + "\n", + "type: mRNA\n", + "location: [<0:>206](+)\n", + "qualifiers:\n", + " Key: product, Value: ['TCP1-beta']\n", + "\n", + "type: gene\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + "\n", + "type: mRNA\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + " Key: product, Value: ['Axl2p']\n", + "\n", + "type: gene\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + "\n", + "type: mRNA\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + "\n" + ] + } + ], + "source": [ + "#Remove all CDS type features from my feature list\n", + "sample_record.features = [f for f in sample_record.features if not (f.type == \"CDS\")]\n", + "\n", + "for feature in sample_record.features:\n", + " print(feature)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also modify the features list to exclude a specific gene:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: source\n", + "location: [0:5028](+)\n", + "qualifiers:\n", + " Key: chromosome, Value: ['IX']\n", + " Key: db_xref, Value: ['taxon:4932']\n", + " Key: mol_type, Value: ['genomic DNA']\n", + " Key: organism, Value: ['Saccharomyces cerevisiae']\n", + "\n", + "type: mRNA\n", + "location: [<0:>206](+)\n", + "qualifiers:\n", + " Key: product, Value: ['TCP1-beta']\n", + "\n", + "type: gene\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + "\n", + "type: mRNA\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + " Key: product, Value: ['Axl2p']\n", + "\n" + ] + } + ], + "source": [ + "#Exclude REV7 from my feature list\n", + "sample_record.features = [f for f in sample_record.features if not ('gene' in f.qualifiers and 'REV7' in f.qualifiers['gene'])]\n", + "\n", + "for feature in sample_record.features:\n", + " print(feature)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Example_CRISPR.ipynb b/docs/notebooks/Example_CRISPR.ipynb index 19e5cf3b..f3cb4254 100644 --- a/docs/notebooks/Example_CRISPR.ipynb +++ b/docs/notebooks/Example_CRISPR.ipynb @@ -15,6 +15,15 @@ "https://academic.oup.com/femsyr/article/doi/10.1093/femsyr/foae026/7740463?login=false " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, { "cell_type": "code", "execution_count": null, @@ -60,15 +69,6 @@ "- Let's see how this can be implemented in pydna\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/docs/notebooks/Example_Gibson.ipynb b/docs/notebooks/Example_Gibson.ipynb index edd7f48c..490148b8 100755 --- a/docs/notebooks/Example_Gibson.ipynb +++ b/docs/notebooks/Example_Gibson.ipynb @@ -1,270 +1,270 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example of a Gibson Assembly in pydna\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "This example showcases a workflow of modelling Gibson assembly to clone gene fragments into plasmids for synthetic biology. The biological example is sourced [here](https://www.nature.com/articles/nmeth.1318#MOESM319), from the original Gibson assembly paper. This example constructs a synthetic pCC1BAC plasmid by joining sequence fragments from Ruminiclostridium (Clostridium) cellulolyticum. The R. cellulolyticum fragments joined are termed F1, F2, and F3, as in the paper.\n", - "\n", - "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Importing all necessary classes and methods\n", - "\n", - "from pydna.parsers import parse\n", - "from pydna.tm import tm_default\n", - "from pydna.amplify import pcr\n", - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.assembly import Assembly\n", - "from pydna.genbank import Genbank\n", - "from pydna.gel import gel\n", - "from pydna.ladders import GeneRuler_1kb_plus\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'molecule_type': 'DNA',\n", - " 'topology': 'circular',\n", - " 'data_file_division': 'BCT',\n", - " 'date': '25-AUG-2017',\n", - " 'accessions': ['CP001348', 'AAVC01000000', 'AAVC01000001-AAVC01000121'],\n", - " 'sequence_version': 1,\n", - " 'keywords': [''],\n", - " 'source': 'Ruminiclostridium cellulolyticum H10',\n", - " 'organism': 'Ruminiclostridium cellulolyticum H10',\n", - " 'taxonomy': ['Bacteria',\n", - " 'Bacillota',\n", - " 'Clostridia',\n", - " 'Eubacteriales',\n", - " 'Oscillospiraceae',\n", - " 'Ruminiclostridium'],\n", - " 'references': [Reference(title='Complete sequence of Clostridium cellulolyticum H10', ...),\n", - " Reference(title='Direct Submission', ...)],\n", - " 'comment': 'URL -- http://www.jgi.doe.gov\\nJGI Project ID: 4002584\\nSource DNA and bacteria available from Jizhong Zhou\\n(jzhou@rccc.ou.edu)\\nContacts: Jizhong Zhou (jzhou@rccc.ou.edu)\\n David Bruce (microbe@cuba.jgi-psf.org)\\nAnnotation done by JGI-ORNL and JGI-PGF\\nFinishing done by JGI-LANL\\nFinished microbial genomes have been curated to close all gaps with\\ngreater than 98% coverage of at least two independent clones. Each\\nbase pair has a minimum q (quality) value of 30 and the total error\\nrate is less than one per 50000.\\nThe JGI and collaborators endorse the principles for the\\ndistribution and use of large scale sequencing data adopted by the\\nlarger genome sequencing community and urge users of this data to\\nfollow them. it is our intention to publish the work of this\\nproject in a timely fashion and we welcome collaborative\\ninteraction on the project and analysis.\\n(http://www.genome.gov/page.cfm?pageID=10506376).'}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reading the R. cellulolyticum genome from GenBank\n", - "gb = Genbank(\"example@example.com\")\n", - "genome = gb.nucleotide(\"CP001348.1\")\n", - "# Print the info of the genome\n", - "genome.annotations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'molecule_type': 'DNA',\n", - " 'topology': 'circular',\n", - " 'data_file_division': 'SYN',\n", - " 'date': '29-AUG-2024',\n", - " 'accessions': ['.'],\n", - " 'keywords': [''],\n", - " 'source': 'synthetic DNA construct',\n", - " 'organism': 'synthetic DNA construct',\n", - " 'taxonomy': [],\n", - " 'references': [Reference(title='Direct Submission', ...),\n", - " Reference(title='Direct Submission', ...),\n", - " Reference(title='Direct Submission', ...),\n", - " Reference(title='Direct Submission', ...)],\n", - " 'comment': 'SGRef: number: 1; type: \"Journal Article\"; journalName: \"Submitted\\n(23-AUG-2007) 726 Post Road, Madison, WI 53713, USA\"\\nSGRef: number: 2; type: \"Journal Article\"\\nSGRef: number: 3; type: \"Journal Article\"'}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reading the plasmid\n", - "vector = parse(\"./pCC1BAC.gb\")[0]\n", - "vector.annotations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Importing pre-designed primers for the PylRS insert fragment. \n", - "\n", - "F1_For = \"GCAGCTTCAAGTCCTGCAAACAAGGTGTACCAGGATCGTT\"\n", - "F1_Rev = \"GATTTCAGTGTAGTTAGGGCCAGTTGAATTCAAACCTGCC\"\n", - "F2_For = \"GGCAGGTTTGAATTCAACTGGCCCTAACTACACTGAAATC\"\n", - "F2_Rev = \"CTTGGTGCCATCAGCATTGTTCTCTGTACCGCCCACTGTC\"\n", - "F3_For = \"GACAGTGGGCGGTACAGAGAACAATGCTGATGGCACCAAG\"\n", - "F3_Rev = \"CAGTTGAATAATCATGTGTTCCTGCGGCAAATGCAGTACC\"\n", - "BACF1_For = \"AACGATCCTGGTACACCTTGTTTGCAGGACTTGAAGCTGCgcggccgcgatcctctagagtcgacctg\"\n", - "BACF3_Rev = \"GGTACTGCATTTGCCGCAGGAACACATGATTATTCAACTGgcggccgccgggtaccgagctcgaattc\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5210\n", - "5384\n", - "5172\n", - "8221\n" - ] - } - ], - "source": [ - "# Getting the PCR products from the genome (might take a while since the genome is large)\n", - "\n", - "pcr_product_F1 = pcr(F1_For, F1_Rev, genome, limit=20)\n", - "pcr_product_F2 = pcr(F2_For, F2_Rev, genome, limit=20)\n", - "pcr_product_F3 = pcr(F3_For, F3_Rev, genome, limit=20)\n", - "pcr_product_BAC = pcr(BACF1_For, BACF3_Rev, vector, limit=20)\n", - "\n", - "# Printing out the PCR fragment sizes\n", - "print(len(pcr_product_F1))\n", - "print(len(pcr_product_F2))\n", - "print(len(pcr_product_F3))\n", - "print(len(pcr_product_BAC))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAFoAlgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK6/wCH3hWx8WeIrfT7+W4jhlcKWgZQ36g1yFel/Bf/AJHey/66D+dAGH8QfCtj4T8RXGn2EtxJDE5UNOylv0ArkK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK9L+C/wDyO9l/10H8680r0v4L/wDI72X/AF0H86AD40f8jve/9dD/ADrzSvS/jR/yO97/ANdD/OvNKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACvS/gv/wAjvZf9dB/OvNK9L+C//I72X/XQfzoAPjR/yO97/wBdD/OvNK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK7n4Y+INL8PeKbW91W6+z26OCz+Wz4H0UE1w1FAHc/E7xBpfiHxTdXulXX2i3dyVfy2TI+jAGuGoooAKKKKACinxQyzuEijeRz0VFJJq7faDrOmIj6hpN/aJINyNcWzxhh6jIGaAM+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXinhj/AJDVv/viva/jf/yBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8hq3/3xXtfxv/5Amjf9eiV4p4Y/5DVv/viva/jf/wAgTRv+vRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooA2fDH/Iat/8AfFe1/G//AJAmjf8AXoleKeGP+Q1b/wC+K9r+N/8AyBNG/wCvRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoooHWgCa1u57OdZrd9kinIOAf51t65448R+JIIYdW1H7RHAgSMeRGm1R2+VRXY/Dn4baN4v07ULjULm/ie2gaRBbyIoJHrlTXnusWMWn6jLbxM7IjEAuQTQBn0UUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP8AkB6z/wBej14r4n/5Ddx/vmvavgh/yA9Z/wCvR68V8T/8hu4/3zQBjUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP+QHrP/Xo9eK+J/wDkN3H++a9q+CH/ACA9Z/69HrxXxP8A8hu4/wB80AY1FFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFA60UDrQB9B/BD/kB6z/16PXivif/AJDdx/vmvSvhZ448OeG9K1OHVtR+zyT27JGPIkfcx7fKpry/XbuC81Saa3ffGzEg4I/nQBmUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAPihlncJFG8jnoqKSTV2+0HWdMRH1DSb+0SQbka4tnjDD1GQM1Z8Mf8hq3/3xXtfxv/5Amjf9eiUAfPdFKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf/AHxXtfxv/wCQJo3/AF6JXinhj/kNW/8Aviva/jf/AMgTRv8Ar0SgD58PU0lKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf8A3xXtfxv/AOQJo3/XoleKeGP+Q1b/AO+K9r+N/wDyBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXgNrdz2c6zW77JFOQcA/zrb1zxx4j8SQQw6tqP2iOBAkY8iNNqjt8qigDnz1NJRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAKBk4qaeFYgpUnkZ5qFfvCrV391PpQBUqWCMSyBWJx7VFViz/wBePrQAyeMRSFVJx71FVi8/15+tV6ACrVtbJMrFiwwM8VVq/Y/6t/8AdoApONrECm06T/WGm0AFdV4b8NWesW1xJcSzqY0LDy2A5/EGuVr0PwN/x43v/XE0AcJeQLb3LxoSQDjmq9XNU/4/pP8AeNU6AJ7aFZpQrEgH0ouYVhlKqSQPWpLD/Xr9aL//AF7fWgCpRRRQBLBGJJApJx7VLeWyW74QsfrTLT/XD61a1P8A1n4UAZ1FFFABRRRQAVLbxLLIFYkA+lRVZs/9ev1oAfe2kds+ELH6mqdaeq/638KzKACiiigB0ahnAPevR9C+Huk6n4XutTmuL1Z4QCqo6BT9crn9a85h/wBav1r3Lwj/AMk/1H/dFAHid9bpbXLxoWIBxzVar+rf8f0n1qhQAUUUUAFFFFABWppemw3r7ZWkA/2SP8Ky66Dw/wD638KAMm9tktpiiFiB61VrQ1T/AI+W+tZ9ABRRRQAUUUUAaWl6fFezKkjOAT/CR/hRqmnxWUzJGzkA/wARH+FWvD//AB9J9aPEH/H0/wBaAMOiiigAooooA6zwZ4XsvEWpxW13LcIjsATCyg/qDXT/ABJ+GujeDrjy9Pub+UbQf9IkRu3soqn8Lv8AkPW/+8K7746f8fo/3B/KgD57YYYihRlgKV/vmhPvCgDodK0C1voHeWSYFVyNrD/Cse9tUtpWRCxA9a63w7/x6Tf7hrmNV/4+W+tAGdSqMsAaSnR/fFAHS6R4cs7+3eSWScFVyNjAf0rF1C0jtZmRCxA/vGuy8Nf8eUv+4a5TWf8Aj6f60AZdFFFAGhplhFeTKkjOAT/CRW/rXhWx062jkhluGLLk72U/yFZWg/8AH0n1rsfFP/HjD/uCgDzaRQrkCm0+b/WGmUAKo3MAe9dt4V8Hafrk6JczXSBjz5TKP5qa4qP/AFi/WvVvh5/x9RfUUAc94q8Hafoc7pbTXThTx5rKf5KK4lhtYgdq9X+If/H1L9TXlMn+sb60ANooooAKKKKAFX7wq1d/dT6VVX7wq1d/dT6UAVKsWf8Arx9ar1Ys/wDXj60AF5/rz9ar1YvP9efrVegAq/Y/6t/92qFX7H/Vv/u0AUpP9YabTpP9YabQAV6H4G/48b3/AK4mvPK9D8Df8eN7/wBcTQBxGqf8f0n+8ap1c1T/AI/pP941ToAt2H+vX60X/wDr2+tFh/r1+tF//r2+tAFSiiigCe0/1w+tWtT/ANZ+FVbT/XD61a1P/WfhQBnUUUUAFFFFABVmz/16/Wq1WbP/AF6/WgC1qv8ArfwrMrT1X/W/hWZQAUUUUAPh/wBav1r3Lwj/AMk/1H/dFeGw/wCtX617l4R/5J/qP+6KAPGdW/4/pPrVCr+rf8f0n1qhQAUUUUAFFFFABXQeH/8AW/hXP10Hh/8A1v4UAZ+qf8fLfWs+tDVP+PlvrWfQAUUUUAFFFFAG54f/AOPpPrR4g/4+n+tHh/8A4+k+tHiD/j6f60AYdFFFABRRRQB6P8Lv+Q9b/wC8K7746f8AH6P9wfyrgfhd/wAh63/3hXffHT/j9H+4P5UAfPj/AHzQn3hQ/wB80J94UAdt4d/49Jv9w1zGq/8AHy31rp/Dv/HpN/uGuY1X/j5b60AZ1Oj++KbTo/vigDvfDX/HlL/uGuU1n/j6f611fhr/AI8pf9w1yms/8fT/AFoAy6KKKANrQf8Aj6T612Pin/jxh/3BXHaD/wAfSfWux8U/8eMP+4KAPN5v9YaZT5v9YaZQA6P/AFi/WvVvh5/x9RfUV5TH/rF+terfDz/j6i+ooAPiH/x9S/U15TJ/rG+terfEP/j6l+prymT/AFjfWgBtFFFABRRRQAq/eFWrv7qfSqq/eFWrv7qfSgCpViz/ANePrVerFn/rx9aAC8/15+tV6sXn+vP1qvQAVfsf9W/+7VCr9j/q3/3aAKUn+sNNp0n+sNNoAK9D8Df8eN7/ANcTXnleh+Bv+PG9/wCuJoA4jVP+P6T/AHjVOrmqf8f0n+8ap0AW7D/Xr9aL/wD17fWiw/16/Wi//wBe31oAqUUUUAT2n+uH1q1qf+s/Cqtp/rh9atan/rPwoAzqKKKACiiigAqzZ/69frVarNn/AK9frQBa1X/W/hWZWnqv+t/CsygAooooAfD/AK1frXuXhH/kn+o/7orw2H/Wr9a9y8I/8k/1H/dFAHjOrf8AH9J9aoVf1b/j+k+tUKACiiigAooooAK6Dw//AK38K5+ug8P/AOt/CgDP1T/j5b61n1oap/x8t9az6ACiiigAooooA3PD/wDx9J9aPEH/AB9P9aPD/wDx9J9aPEH/AB9P9aAMOiiigAooooA9H+F3/Iet/wDeFd98dP8Aj9H+4P5VwPwu/wCQ9b/7wrvvjp/x+j/cH8qAPnx/vmhPvCh/vmhPvCgDtvDv/HpN/uGuY1X/AI+W+tdP4d/49Jv9w1zGq/8AHy31oAzqdH98U2nR/fFAHe+Gv+PKX/cNcprP/H0/1rq/DX/HlL/uGuU1n/j6f60AZdFFFAG1oP8Ax9J9a7HxT/x4w/7grjtB/wCPpPrXY+Kf+PGH/cFAHm83+sNMp83+sNMoAdH/AKxfrXq3w8/4+ovqK8pj/wBYv1r1b4ef8fUX1FAB8Q/+PqX6mvKZP9Y31r1b4h/8fUv1NeUyf6xvrQA2iiigAooooAVThhU9xKjqu05wPSq9FABU1s6xyhmOBUNFAE1y6ySllORUNFFABVu1njjRg7YJHpVSigBzkFyR0ptFFABXZeFNd03TbS6S7ufLZ4yqjYxyfwFcbRQBZv5UmuneNsqTwcVWoooAsWkqRTBnOB9KLuVJZiyHI+lV6KACiiigCW3dUlBY4FWL6eOZ8xtkfQ1SooAKKKKACiiigAqa2kWOUFjgVDRQBf1C4inkzG+4fQiqFFFABRRRQA+NgrgnpmvVfDvjLQLDwfeWFzf7LmRQETyZDn8QuK8nooAt6hNHPdO8bblJ4OMVUoooAKKKKACiiigArY0e+trWTM0m0Y/uk/yrHooAuX88c87NG24E+mKp0UUAFFFFABRRRQBq6PeW9rcK00m1QeuCf5UaxeW91cM0Mm5SeuCP51lUUAFFFFABRRRQB2vgLXtM0XV4Z9QufJjVgS3ls38ga6/4r+OfDniW6D6RqP2ldoGfIkTt/tKK8booAViCxIoU4YE0lFAHVaLrFhaW8iTz7GK4A2Mf5CsLUJ4p52aNtwJ9CKpUUAFOQgMCabRQB1+h63p1nbSJPcbGKkAbGP8AIVz+p3MVxOzRPuBPoRVCigAooooA1NJu4LWdWmfaAeuCf5V0uv8AiDS720iS3ut7KgBHlsP5iuGooAfIwZyR0plFFADkIDgnpXoHg3xNpGk3EbXt35Sg8ny3b+QNee0UAeheMvE2katcSNZXfmqTwfLdf5gV5+5BckdKbRQAUUUUAFFFFABUzJbiyidZWNyZHEke3hUAXac9ySX4/wBketP057WPU7R76J5bRZkM8afeePcNwHI5IzXoGraZcWWi3V3qttYvpzwuLdbfw/JayLIVIjPmmFAAG2k5dsgEc5oA82oore8JJBNq7wPAstzLCVtDJatcokuVOWjUEsNocfdbkg4OKAMe6S3SVRbStJH5cZLMuCHKAuPoG3DPfFQ12njGyk07ToYNUgtv7TeYNFLa6S9ioiAbcGDRRbiSUx8pxg881xdABRRVmx06+1S4+z6fZXF3PjPl28TSNj1wATQBXZWRyjqVZTggjBBpKvaxb6pb6nM2sW1zb3szGWRbmIxuxYkk4IHU5qjQAUUVZsdOvtUuPs+n2Vxdz4z5dvE0jY9cAE0AV2Vkco6lWU4IIwQaSr2sW+qW+pzNrFtc297MxlkW5iMbsWJJOCB1Oao0AFKVZQpKkBhlSR1GccfkaWOOSaVYokZ5HIVVUZLE9gK09V0rXrC2tTq2mX9rBGnlQG5tmjGCzPgEgZ5Zj+NAGVRRRQAUu1ggcqdpJAOOCR1/mPzpOprXvNG8Q2ekRNe6VqMGno7SJJNaukYZwoJ3EY5Cr37UAZFFFFABRRRQAUUU6OOSaVYokZ5HIVVUZLE9gKAEKsoUlSAwypI6jOOPyNJWrqula9YW1qdW0y/tYI08qA3Ns0YwWZ8AkDPLMfxrKoAKKKKAClVWdwiKWZjgADJJpK0tFsNZu75J9Esby5ubZ1lU2sDSmNgcg4APcd6AM2iprq0uLK5e3u7eW3nQ4eKVCjL9QeRUNABRRRQAUUUUAFFFWbHTr7VLj7Pp9lcXc+M+XbxNI2PXABNAFdlZHKOpVlOCCMEGkq9rFvqlvqczaxbXNvezMZZFuYjG7FiSTggdTmqNABRRRQAUUUUAFFFTTpbrFbGGVnkaMmZSuAj72AA9RtCnPufSgCGiiigAooooAKKKmnS3WK2MMrPI0ZMylcBH3sAB6jaFOfc+lAENFFFABRU1mlvJewJdytFbNIolkVdxRM8kDuQM8VDQAUUUUAFFTWaW8l7Al3K0Vs0iiWRV3FEzyQO5AzxUNABRRRQAUUVMyW4sonWVjcmRxJHt4VAF2nPckl+P9ketAENFFFABRRRQAUUrKyHDKVOAcEY4IyKSgAooooAKKKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vroPCTMLy+RxGLKS0K3kjzmHy4t6chlDEHdsGArZzjBzXP10fgtJW1edoXnaRLdmFrAEL3fzKPKAcMp67sFW+50JoAueIEsbfw35GhyR3GmG7Rp5ftjzOkux9gw8MW0Eb+Qpzt68VyFdz4yS5bRIpLjT73Rdtwqrp11DDF5uVbMoWOKLO3GCSp++MHqK4agArqPCohk07U4NRaOLSXeHz5muWhZZAH2KCschbI3nGw/dzxiuXrrvBCT7dQkt4bq/dfLU6ZbRxSNcA7vnKyJICEx/cY/OOnNAEPioQx6dpkGnNHLpKPN5Ey3LTM0hCb1JaOMrgbDjYPvZ5zXL113jdJ9mnyXEN1YO3mKNMuY4o2twNvzhY0jAD5/uKfkPXiuRoAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaANjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rf1KLS7Xw9qa+Hp4rlJUT7bm+klaNBIhDKrQQj7+wZ+bGe2c1geFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vooooAKKKKACtjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rHrY8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpQBv6lFpdr4e1NfD08VykqJ9tzfSStGgkQhlVoIR9/YM/NjPbOa4ivRPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/+IfKenBrzugAooooAK6/w+ljceG/I1ySO30wXbtBL9seF3l2JvGEhl3ADZyVGN3XmuQruPB1rey6fAbeVJrZ7m4NzayQQThdkSMhVJVYKzklN2McD0oAx/FrMbyxRBGbKO0C2ciTmbzIt78lmVSTu3jBVcYxgYrn66Hxe5m1Czud8wFxaJKtvMULWylmAT5FVQCAHACrw447nnqACiiigAooooAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaACiiigAooooAtadYTanepawFFdgzFpG2qiqpZmJ9AoJ/Cr91oCxWU91Z6vp2opbgNOtt5qtGpYKGxIiZG5lHy56ik8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpXWeJVvJPDl41xpGo6CibGCXNvbwreneo2/u4IixGd/8Q+U9ODQB53RRRQAUUUUAWtOsJtTvUtYCiuwZi0jbVRVUszE+gUE/hV+60BYrKe6s9X07UUtwGnW281WjUsFDYkRMjcyj5c9RSeFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv8AQGgDnKKKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv9AaAOcooooAmtLWa+vYLS2TfPPIsUa5A3MxwBk+5rXfw2pimNprel3txDG0r20DSh9qgsxBeNUbABPDHgcZrO0lPN1mxj+1/Y91xGv2nOPJyw+fOR069e1eiaot9LpN+s2j6po8Yt5GOpT29tGlxhSdhdIEY78beJGyW7igDy+iiigAoorT0fSTqy36Rt/pENuJYY9yjzG8xFK89flZjgc/LQBRuLma6kEk7l3CJGCf7qKFUfgoA/Coq6nxL4dsdJtJzbi7jltbtbUtcSoy3WULGSNQAVAwvBLf6xeQeK5agAooooAKKKKALOnTW9vqdrNdwefbRzI80OceYgILL+IyK6P+2tDuri7N/ZxF0t51tLm2so4BIzRMqCSFTtGGIIZeQRzu4IKKAOTrT0O80yyup5NU01L+JoHWKN2cBJeCrHY6EjjB+boxPJAFFFABqOqWd7brHb6Dp1g4fcZbZ7hmIwflPmSuMc56Z4HPWsyiigAra0nUrCx0a+jubC1vJ5riDak6NkRhZd+11IKnJj6HnjIIFFFAFTUv7KbypdM+1x7s+ZBcbW8vpja4xuB56quMd6oUUUAFb3h5tDKNHrD+Xi8t5QfKZt8S7/MTK8ru3J27UUUAHiK+sb2C1MJtHu1kmEslpZi2jMWVEY2gAFuHOcZwwBJI4waKKALmlai2lanBfLbWt0YiT5N3CJYnBBBDKeD1/DqOa6PxF4stdU8P21lZW1tAXnlkni/s22jMQIi2hJI0U9UbJAUkEA5AySigDkKKKKALWmS2sGq2c19CZrSOdGniHV0DAsO3UZFdNLr+m3cV7Dc2ulJEttIga305Uku5vmEUi4UeUF+TKggEKeGJNFFAHH0UUUAFFFFABWhod1aWWrw3F9CJYFDggxCQKxUhW2MQG2sQ208HGD1oooA1JtW0a903U2nsY4tUeIRwS28CpFL+9Rt5QcRPsVh8vBBxgck83RRQAUUUUAFbem6ppFlo8kVzoNrf6h5+5ZrmSYL5RXBXEcqYIIyODncemBkooAz9RvIL24WS3021sECbTFbNKyk5PzHzHc55x1xwOOtVKKKACiiigAooooAK19G1DSrG1vhqGjw6hcOqG2MzyhEIPzAiORDgg9cnBUcck0UUAV9S1C2vvK+z6PZadszu+yvM3mZx18yR+mO2OpznjFCiigAooooAKKKKANPQLu1sdV+0XkUMsS284VJ4vMQyGFwmV/3yvPbrxin3k+i3lo80NrPYXoxiGJvNgk55xuO9MDJ5L59qKKAMmiiigAooooA0NGexTUf+JjxbNDMhbZv2u0TKjY9mKn8K3NUvdDGjTWVq9pMqW0AtjHZbJfPyPNdpSAxUgPwSR86gD5c0UUAcnRRRQB0Xh3xJDpVrLY3Wn2UsMr7xdNYQTzwnAHHmqQy8fd49iMnNbxVrA13xJfX0awrA80nk+VbRwZjLsVLBAMtg8k5PqTiiigDGooooA6LSNasNM0RkbT7O7u2mk3rc2yvuUoPLKucldjgkqMBg2DkdKviK6s7u7tntTbvKLcC6ktrcQRSS7mOVQBQBtKD7oyQTjnJKKAMeiiigCzp01vb6nazXcHn20cyPNDnHmICCy/iMiuj/ALa0O6uLs39nEXS3nW0ubayjgEjNEyoJIVO0YYghl5BHO7ggooA5OiiigArX0O/s9PTUpLq1trp3tQkEVzEXUv5sZPIwVOwP8wIPvzRRQBFqD6PPbrNYRXVrcFsPbORJHjByyvww5wNpB6/eNZtFFABRRRQAUUUUAf/Z", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAFoCAIAAAAElhK7AAAbC0lEQVR4Ae3dTaxdVdkAYG4vUOSvRQMUpI1JIRoSTUeYiDKhECKJRutPJCFGYiIJMSZqHDkyDMCBDgwTEhOVCQwalDAg0AjG3xAIN1ZhoBICJAoItJW/CqWujwN7n+909e5zetbad629nzvQdVbXfte7nnfv+3LvOZSTTvJFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gVWNjDD1dXVU0455eyzz77ooot27dp19dVXf/nLXw753HXXXffff//a2tqzzz576NChN99888iRIxuYp60JECBAYMACmwZ8NkcjQIAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqExgpbJ8E6W7srKyurq6efPmrVu3bt++fdeuXbt3796zZ08Iv3fv3n379q2trT3zzDMHDhw4fPjwkSNHjh49mmhnYQgQIECgLAF/xVpZ9ZANAQIECPQsoBH2DG47AgQIEChLQCMsqx6yIUCAAIGeBTTCnsFtR4AAAQJlCWiEZdVDNgQIECDQs4BG2DO47QgQIECgLAGNsKx6yIYAAQIEehbQCHsGtx0BAgQIlCWgEZZVD9kQIECAQM8CGmHP4LYjQIAAgbIENMKy6iEbAgQIEOhZQCPsGdx2BAgQIFCWgEZYVj1kQ4AAAQI9C2iEPYPbjgABAgTKEtAIy6qHbAgQIECgZwGNsGdw2xEgQIBAWQIaYVn1kA0BAgQI9CygEfYMbjsCBAgQKEtAIyyrHrIhQIAAgZ4FNMKewW1HgAABAmUJaIRl1UM2BAgQINCzgEbYM7jtCBAgQKAsAY2wrHrIhgABAgR6FtAIewa3HQECBAiUJaARllUP2RAgQIBAzwIaYc/gtiNAgACBsgQ0wrLqIRsCBAgQ6FlAI+wZ3HYECBAgUJaARlhWPWRDgAABAj0LaIQ9g9uOAAECBMoS0AjLqodsCBAgQKBnAY2wZ3DbESBAgEBZAhphWfWQDQECBAj0LKAR9gxuOwIECBAoS0AjLKsesiFAgACBngU0wp7BbUeAAAECZQlohGXVQzYECBAg0LOARtgzuO0IECBAoCwBjbCsesiGAAECBHoW0Ah7BrcdAQIECJQloBGWVQ/ZECBAgEDPAhphz+C2I0CAAIGyBDTCsuohGwIECBDoWUAj7BncdgQIECBQloBGWFY9ZEOAAAECPQtohD2D244AAQIEyhLQCMuqh2wIECBAoGcBjbBncNsRIECAQFkCGmFZ9ZANAQIECPQsoBH2DG47AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZBdYyb5DMRusrKxs2rTptNNO27Jly44dO3bt2rV79+49e/aEBPfu3btv3761tbWnn3764MGDb7zxxttvv3306NFicpcIAQIECOQS8Fes5ZIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILCAwMoCawe0dGVlZXV1dfPmzVu3bt2+ffuuXbt27969Z8+ecMS9e/fu27dvbW3tmWeeOXDgwOHDh48cOXL06NEBnd5RCBAgQKAV8FestRZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEBi6wMnO+lZWVTZs2nXzyyZs3bz799NPDn7722muHDx9+66233n777aNHj86sP/ZliBAuP+WUU0477bQmwhtvvPHmm2+GIPNEODbmZGaS2Kmnnhoin3HGGWHy1VdfDZH/+9//TtI73oWd85OcQ9gtW7ZccMEFO3fuvPTSS8NVjz/++D/+8Y9//vOfBw8eDBvNn/+EMSAExve9731nnnlmiPbKK6+8/vrrATNQzIm5TuZBI3xNnGe2mGiHLcLXOhF6+KPgEL6a+yFU7ayzzgr7/uc//5nUrrkrlrkxjneQsHUgCruH+3BS2YsvvvhjH/vYZZddFi55+OGH//znP//973+f1Dfc5/Pf5MfbMdP85CCrq6vh5g+1DobhOO9///vDdi+99FK4OYNnuLXCg3DkyJFQ9ByYcx7tnbvy/27LyTeQs88+e+vWrR/4wAfC5S+++OKBAwcOHTo0+ZYyeQqy3qLBbYIWbryQxuTR/uhHPxpugMsvvzyk9Pvf/z7cBvv375885iG9cFtOGNc3nEQOt9Z0RcIxzz333BD2hRdeCIedrku4tUJp1o85LRwYQ+YTxpB8YDznnHNC/PPOOy8se/7550P8l19+OWCGhCffUialnw4yz7g5SPOdalKvsNG2bdtChH/961+T7QJO8x1s0eNMZ9J8Vwy38WSv888/P5Tmoosu2rFjR1j59NNPP/vss+GpfO655yb3zOT2nhxzOlTCsb9iLSGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQWETgc5/73CLLI2uXjxAJaooAAQIECPQj8NBDDy250fIRlkzA5QQIECBA4HgC3f89wt/97nfHu3jO+eUjzLmRZQQIECBAgAABAgQIECAwJoHcb0CmjZ82WrTOPWwR3XehyUKSLCSNhehmFg/gCDMn8pIAgYUFcr8BmTZ+2mhRrB62iO670GQhSRaSxkJ0M4sHcISZE3lJoH+B+HuEW7ZsueWWW/bv3//8O19hcOutt4bJ+fNbPsKce+V+AzJt/LTRokQ9bBHdd6HJQpIsJI2F6GYWD+AIMyfykkApAr/85S+/853vfPCDH9z0zlcYhJdhcv78lo8w/15WEiBAgACBxALRf8yMTh5v4+ji6OTxIpgnQGBgAqW9o5kpn0xhm5shd/xmozDoc6/pfXsex381+u9///vb3/72hRdeOPmJMAzCyzA5f3LLR5h/LysJEKhC4Fvf+lZReWbKJ1PYhi53/GajMOhzr+l9ex7HG+FXv/rVbdu23X///c+98xUG4WWYnD+55SPMv5eVBAhUIVDa74Qy5ZMpbFPi3PGbjcKgz72m9zUmQIAAAQIENlpg+c98Lh9hHYOswcO+aeOnjRZl6WGL6L4LTRaSZCFpLEQ3s3gAR5g5kZcEGoENeVcy/qvRn//85y+88MI111wTfiMavsIg/GsUYbLJtXOwfIR1tsgaPOybNn7aaFGWHraI7rvQZCFJFpLGQnQziwdwhJkTeUmgESjoXcno74Wjk032M4Po4ujkzIXzvIzGiU7OE+3YNdFQ0cljrz12JnphdPLYa+eciUaLTs4ZMMeyaD7RyRy7NzGjO0Ynm0tKG0SzjU6Wlrl8CHQK3HzzzZ1rki+I/0S4/Gc+l4+wzlGzBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOIBHGHmRF4SaAS+//3vN+MNHoQ3IX74wx/+5S9/Cb8gDV9hEF6GyfnTWj7COntlDR72TRs/bbQoSw9bRPddaLKQJAtJYyG6mcUDOMLMibwkQIAAAQIECBAoTCD8I2fJf9fo8umt7502ftpo0cx72CK670KThSRZSBoL0c0srusIpWWbKZ9MYZvS547fbBQGfe41ve8GjuPvES7/sbTlI6yDkjV42Ddt/LTRoiw9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3re4cfQTaNHJ46UeXRydPF6EdeajcaKT6wRZ54+ioaKT6wRp/ih6YXSyuWTRQTRadHLRyAnXR/OJTibc9NhQ0R2jk8deW8hMNNvoZAkJRxOLTvaTbXTr6ORC+UQjRCcXCtssjoaKTjaXnPAgGjY6ecJblHZh/CfC5T+WtnyEdaSyBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOK6jlBatpnyyRS2KX3u+M1GYdDnXtP7FjcOvyP2qdFlPjQ7XdHlMaejRcc9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3teYAAECBAgQKEkg/BOBT43u378//MVy4SsMbr311mByYiVaHrNz3x626Myhc0EhSTZ/b8XOnTt//etfh39NNvxvGHfmX86CQiTnBCkt20z5ZArbIOeO32wUBn3uNdm30Kdy+f++/PIRpgszM84aPOyVNn7aaDMUk5c9bBHdd6HJQpJ86KGHJmnfc889n//851dXVz/xiU/cd999C51lYxcXIjknQmnZZsonU9gGOXf8ZqMw6HOvyb6FPpXRDwhFJ6f5psfRxdHJ6avmHEfjRCfnDDizLBoqOjlzYfRl9MLoZPTyeSaj0aKT80TLtCaaT3QyUwKTsM0jN711M5l161TBpzNvYkYnmz/dwEE0sehkP0lGt45OLpRPNEJ0cqGwzeJoqOhkc8kJD6Jho5MnvMXMhc0DOL1LMzmzOMdLnxqNqKb90FTaaJF0K/mUVw8OUZyZyR07dtx4442f/exnH3/88Ztuuin8RBjGJ5988syykl8WIjknUWnZZsonU9gGOXf8ZqMw6HOvyb4b/lTGG+Hy/3355SNMF2ZmnDV42Ctt/LTRZigmL3vYIrrvQpOFJPmNb3zj1Vdf/fCHPxz+N7w1uH379q985Stf+9rXFjrLxi4uRHJOhNKyzZRPprANcu74zUZh0Odek30H8FROAxp3C/ziF7/oXmQFAQI1C3jMa67ee7lfffXVTzzxRPjA5E9/+tNzzz13Mn3vvfe+9+cb/P/hJ/cf/OAH5513XqY80n6E6fXXXz906NCB977eeuutyTBh8tdee+1TTz31t7/97corrwxP4IsvvviHP/zhIx/5SMItlg9V+E21/AFFiArkflqjm64zmfbpbjbK/Zj3+Yz3/6iWdpO8W9a1tbXwbfSMM84IPyOHf6/84x//ePiDhd66PHjw4Hvf+f/f/zf3zTKD8F3+61//+mOPPXb77beH33EtEyp6bXPSJB8s/PSnPx0S/tKXvjTZqwke3frEJh999NELLrhg9+7d4dd94R2v8L7Xpz71qQceeODEomW6avmbKkliWe/MJBl2BqnrCLmf1k6umQXNA5jk6W6C537M+3zG+39US7tJ3i3rb3/7202b3n37MLyP8sgjj4T3UZobqKn9OoOf/exn3/zmN9dZsMwfTTJZWVn5zGc+85vf/OZXv/rVFVdcsUzAmWubk6b6CNNZZ51122233X333RdeeGETfGbTZV42Me+4444mTpBpxiUMlr+pkpwi652ZJMPOIHUdIffT2sk1s6B5WFI93U38rI95k3YPz3j/j2ppN8m7Nb3hhhvCt9EvfvGLk9dnnnnm3r17X3nllabknYOtW7cGzW3btnWuPIEFzT0xuTb8wBrS++Mf/3gCoaKXPPnkk5MPFoafOJsPFk4/NtGrOic/+clP/ulPf/rrX//auXLRBXfdddd3v/vdU089dXJh+AzkF77whQcffHDROFnXL39TJUkv652ZJMPOIHUdIffT2sk1syDT093skukx7/MZ7/9RLe0maap5UnhrcPpNpvDjVzmfrLvzzjvbRN8bXXzxxe8Nl/3/q6666vrrr//e97734x//+Ec/+tGHPvShsOMll1yybNyTTtq8efN11123fJyZCKeffnrI9pxzzpnM/+QnPwnvFIZPJM8s2/CXJd9UG44z1ARyP62LuuV7uptMcjzmPT/jPT+qpd0kTSkNCBAgQIAAAQIECBAgQGDoAv8DNIwUyHDTngMAAAAASUVORK5CYII=", - "text/plain": [ - "" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Making a gel to show the PCR products\n", - "im = gel(\n", - " [\n", - " GeneRuler_1kb_plus,\n", - " [pcr_product_F1],\n", - " [pcr_product_F2],\n", - " [pcr_product_F3],\n", - " [pcr_product_BAC],\n", - " ]\n", - ")\n", - "im.rotate(90, expand=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: True\n", - "size: 23827\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 28\n", - "/molecule_type=DNA\n", - "Dseq(o23827)\n", - "GCAG..ccgc\n", - "CGTC..ggcg\n" - ] - } - ], - "source": [ - "# Performing the Gibson Assembly. Note that the assembly class parameters should be given as a list.\n", - "\n", - "assembled = Assembly([Dseqrecord(pcr_product_F1), Dseqrecord(pcr_product_F2), Dseqrecord(pcr_product_F3), Dseqrecord(pcr_product_BAC)])\n", - "assembled_circ = assembled.assemble_circular()\n", - "\n", - "# Printing out the Gibson Assembly product\n", - "print(assembled_circ[0])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of a Gibson Assembly in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "This example showcases a workflow of modelling Gibson assembly to clone gene fragments into plasmids for synthetic biology. The biological example is sourced [here](https://www.nature.com/articles/nmeth.1318#MOESM319), from the original Gibson assembly paper. This example constructs a synthetic pCC1BAC plasmid by joining sequence fragments from Ruminiclostridium (Clostridium) cellulolyticum. The R. cellulolyticum fragments joined are termed F1, F2, and F3, as in the paper.\n", + "\n", + "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing all necessary classes and methods\n", + "\n", + "from pydna.parsers import parse\n", + "from pydna.tm import tm_default\n", + "from pydna.amplify import pcr\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.assembly import Assembly\n", + "from pydna.genbank import Genbank\n", + "from pydna.gel import gel\n", + "from pydna.ladders import GeneRuler_1kb_plus\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'molecule_type': 'DNA',\n", + " 'topology': 'circular',\n", + " 'data_file_division': 'BCT',\n", + " 'date': '25-AUG-2017',\n", + " 'accessions': ['CP001348', 'AAVC01000000', 'AAVC01000001-AAVC01000121'],\n", + " 'sequence_version': 1,\n", + " 'keywords': [''],\n", + " 'source': 'Ruminiclostridium cellulolyticum H10',\n", + " 'organism': 'Ruminiclostridium cellulolyticum H10',\n", + " 'taxonomy': ['Bacteria',\n", + " 'Bacillota',\n", + " 'Clostridia',\n", + " 'Eubacteriales',\n", + " 'Oscillospiraceae',\n", + " 'Ruminiclostridium'],\n", + " 'references': [Reference(title='Complete sequence of Clostridium cellulolyticum H10', ...),\n", + " Reference(title='Direct Submission', ...)],\n", + " 'comment': 'URL -- http://www.jgi.doe.gov\\nJGI Project ID: 4002584\\nSource DNA and bacteria available from Jizhong Zhou\\n(jzhou@rccc.ou.edu)\\nContacts: Jizhong Zhou (jzhou@rccc.ou.edu)\\n David Bruce (microbe@cuba.jgi-psf.org)\\nAnnotation done by JGI-ORNL and JGI-PGF\\nFinishing done by JGI-LANL\\nFinished microbial genomes have been curated to close all gaps with\\ngreater than 98% coverage of at least two independent clones. Each\\nbase pair has a minimum q (quality) value of 30 and the total error\\nrate is less than one per 50000.\\nThe JGI and collaborators endorse the principles for the\\ndistribution and use of large scale sequencing data adopted by the\\nlarger genome sequencing community and urge users of this data to\\nfollow them. it is our intention to publish the work of this\\nproject in a timely fashion and we welcome collaborative\\ninteraction on the project and analysis.\\n(http://www.genome.gov/page.cfm?pageID=10506376).'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reading the R. cellulolyticum genome from GenBank\n", + "gb = Genbank(\"example@example.com\")\n", + "genome = gb.nucleotide(\"CP001348.1\")\n", + "# Print the info of the genome\n", + "genome.annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'molecule_type': 'DNA',\n", + " 'topology': 'circular',\n", + " 'data_file_division': 'SYN',\n", + " 'date': '29-AUG-2024',\n", + " 'accessions': ['.'],\n", + " 'keywords': [''],\n", + " 'source': 'synthetic DNA construct',\n", + " 'organism': 'synthetic DNA construct',\n", + " 'taxonomy': [],\n", + " 'references': [Reference(title='Direct Submission', ...),\n", + " Reference(title='Direct Submission', ...),\n", + " Reference(title='Direct Submission', ...),\n", + " Reference(title='Direct Submission', ...)],\n", + " 'comment': 'SGRef: number: 1; type: \"Journal Article\"; journalName: \"Submitted\\n(23-AUG-2007) 726 Post Road, Madison, WI 53713, USA\"\\nSGRef: number: 2; type: \"Journal Article\"\\nSGRef: number: 3; type: \"Journal Article\"'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reading the plasmid\n", + "vector = parse(\"./pCC1BAC.gb\")[0]\n", + "vector.annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing pre-designed primers for the PylRS insert fragment. \n", + "\n", + "F1_For = \"GCAGCTTCAAGTCCTGCAAACAAGGTGTACCAGGATCGTT\"\n", + "F1_Rev = \"GATTTCAGTGTAGTTAGGGCCAGTTGAATTCAAACCTGCC\"\n", + "F2_For = \"GGCAGGTTTGAATTCAACTGGCCCTAACTACACTGAAATC\"\n", + "F2_Rev = \"CTTGGTGCCATCAGCATTGTTCTCTGTACCGCCCACTGTC\"\n", + "F3_For = \"GACAGTGGGCGGTACAGAGAACAATGCTGATGGCACCAAG\"\n", + "F3_Rev = \"CAGTTGAATAATCATGTGTTCCTGCGGCAAATGCAGTACC\"\n", + "BACF1_For = \"AACGATCCTGGTACACCTTGTTTGCAGGACTTGAAGCTGCgcggccgcgatcctctagagtcgacctg\"\n", + "BACF3_Rev = \"GGTACTGCATTTGCCGCAGGAACACATGATTATTCAACTGgcggccgccgggtaccgagctcgaattc\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5210\n", + "5384\n", + "5172\n", + "8221\n" + ] + } + ], + "source": [ + "# Getting the PCR products from the genome (might take a while since the genome is large)\n", + "\n", + "pcr_product_F1 = pcr(F1_For, F1_Rev, genome, limit=20)\n", + "pcr_product_F2 = pcr(F2_For, F2_Rev, genome, limit=20)\n", + "pcr_product_F3 = pcr(F3_For, F3_Rev, genome, limit=20)\n", + "pcr_product_BAC = pcr(BACF1_For, BACF3_Rev, vector, limit=20)\n", + "\n", + "# Printing out the PCR fragment sizes\n", + "print(len(pcr_product_F1))\n", + "print(len(pcr_product_F2))\n", + "print(len(pcr_product_F3))\n", + "print(len(pcr_product_BAC))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAFoAlgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK6/wCH3hWx8WeIrfT7+W4jhlcKWgZQ36g1yFel/Bf/AJHey/66D+dAGH8QfCtj4T8RXGn2EtxJDE5UNOylv0ArkK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK9L+C/wDyO9l/10H8680r0v4L/wDI72X/AF0H86AD40f8jve/9dD/ADrzSvS/jR/yO97/ANdD/OvNKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACvS/gv/wAjvZf9dB/OvNK9L+C//I72X/XQfzoAPjR/yO97/wBdD/OvNK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK7n4Y+INL8PeKbW91W6+z26OCz+Wz4H0UE1w1FAHc/E7xBpfiHxTdXulXX2i3dyVfy2TI+jAGuGoooAKKKKACinxQyzuEijeRz0VFJJq7faDrOmIj6hpN/aJINyNcWzxhh6jIGaAM+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXinhj/AJDVv/viva/jf/yBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8hq3/3xXtfxv/5Amjf9eiV4p4Y/5DVv/viva/jf/wAgTRv+vRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooA2fDH/Iat/8AfFe1/G//AJAmjf8AXoleKeGP+Q1b/wC+K9r+N/8AyBNG/wCvRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoooHWgCa1u57OdZrd9kinIOAf51t65448R+JIIYdW1H7RHAgSMeRGm1R2+VRXY/Dn4baN4v07ULjULm/ie2gaRBbyIoJHrlTXnusWMWn6jLbxM7IjEAuQTQBn0UUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP8AkB6z/wBej14r4n/5Ddx/vmvavgh/yA9Z/wCvR68V8T/8hu4/3zQBjUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP+QHrP/Xo9eK+J/wDkN3H++a9q+CH/ACA9Z/69HrxXxP8A8hu4/wB80AY1FFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFA60UDrQB9B/BD/kB6z/16PXivif/AJDdx/vmvSvhZ448OeG9K1OHVtR+zyT27JGPIkfcx7fKpry/XbuC81Saa3ffGzEg4I/nQBmUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAPihlncJFG8jnoqKSTV2+0HWdMRH1DSb+0SQbka4tnjDD1GQM1Z8Mf8hq3/3xXtfxv/5Amjf9eiUAfPdFKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf/AHxXtfxv/wCQJo3/AF6JXinhj/kNW/8Aviva/jf/AMgTRv8Ar0SgD58PU0lKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf8A3xXtfxv/AOQJo3/XoleKeGP+Q1b/AO+K9r+N/wDyBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXgNrdz2c6zW77JFOQcA/zrb1zxx4j8SQQw6tqP2iOBAkY8iNNqjt8qigDnz1NJRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAKBk4qaeFYgpUnkZ5qFfvCrV391PpQBUqWCMSyBWJx7VFViz/wBePrQAyeMRSFVJx71FVi8/15+tV6ACrVtbJMrFiwwM8VVq/Y/6t/8AdoApONrECm06T/WGm0AFdV4b8NWesW1xJcSzqY0LDy2A5/EGuVr0PwN/x43v/XE0AcJeQLb3LxoSQDjmq9XNU/4/pP8AeNU6AJ7aFZpQrEgH0ouYVhlKqSQPWpLD/Xr9aL//AF7fWgCpRRRQBLBGJJApJx7VLeWyW74QsfrTLT/XD61a1P8A1n4UAZ1FFFABRRRQAVLbxLLIFYkA+lRVZs/9ev1oAfe2kds+ELH6mqdaeq/638KzKACiiigB0ahnAPevR9C+Huk6n4XutTmuL1Z4QCqo6BT9crn9a85h/wBav1r3Lwj/AMk/1H/dFAHid9bpbXLxoWIBxzVar+rf8f0n1qhQAUUUUAFFFFABWppemw3r7ZWkA/2SP8Ky66Dw/wD638KAMm9tktpiiFiB61VrQ1T/AI+W+tZ9ABRRRQAUUUUAaWl6fFezKkjOAT/CR/hRqmnxWUzJGzkA/wARH+FWvD//AB9J9aPEH/H0/wBaAMOiiigAooooA6zwZ4XsvEWpxW13LcIjsATCyg/qDXT/ABJ+GujeDrjy9Pub+UbQf9IkRu3soqn8Lv8AkPW/+8K7746f8fo/3B/KgD57YYYihRlgKV/vmhPvCgDodK0C1voHeWSYFVyNrD/Cse9tUtpWRCxA9a63w7/x6Tf7hrmNV/4+W+tAGdSqMsAaSnR/fFAHS6R4cs7+3eSWScFVyNjAf0rF1C0jtZmRCxA/vGuy8Nf8eUv+4a5TWf8Aj6f60AZdFFFAGhplhFeTKkjOAT/CRW/rXhWx062jkhluGLLk72U/yFZWg/8AH0n1rsfFP/HjD/uCgDzaRQrkCm0+b/WGmUAKo3MAe9dt4V8Hafrk6JczXSBjz5TKP5qa4qP/AFi/WvVvh5/x9RfUUAc94q8Hafoc7pbTXThTx5rKf5KK4lhtYgdq9X+If/H1L9TXlMn+sb60ANooooAKKKKAFX7wq1d/dT6VVX7wq1d/dT6UAVKsWf8Arx9ar1Ys/wDXj60AF5/rz9ar1YvP9efrVegAq/Y/6t/92qFX7H/Vv/u0AUpP9YabTpP9YabQAV6H4G/48b3/AK4mvPK9D8Df8eN7/wBcTQBxGqf8f0n+8ap1c1T/AI/pP941ToAt2H+vX60X/wDr2+tFh/r1+tF//r2+tAFSiiigCe0/1w+tWtT/ANZ+FVbT/XD61a1P/WfhQBnUUUUAFFFFABVmz/16/Wq1WbP/AF6/WgC1qv8ArfwrMrT1X/W/hWZQAUUUUAPh/wBav1r3Lwj/AMk/1H/dFeGw/wCtX617l4R/5J/qP+6KAPGdW/4/pPrVCr+rf8f0n1qhQAUUUUAFFFFABXQeH/8AW/hXP10Hh/8A1v4UAZ+qf8fLfWs+tDVP+PlvrWfQAUUUUAFFFFAG54f/AOPpPrR4g/4+n+tHh/8A4+k+tHiD/j6f60AYdFFFABRRRQB6P8Lv+Q9b/wC8K7746f8AH6P9wfyrgfhd/wAh63/3hXffHT/j9H+4P5UAfPj/AHzQn3hQ/wB80J94UAdt4d/49Jv9w1zGq/8AHy31rp/Dv/HpN/uGuY1X/j5b60AZ1Oj++KbTo/vigDvfDX/HlL/uGuU1n/j6f611fhr/AI8pf9w1yms/8fT/AFoAy6KKKANrQf8Aj6T612Pin/jxh/3BXHaD/wAfSfWux8U/8eMP+4KAPN5v9YaZT5v9YaZQA6P/AFi/WvVvh5/x9RfUV5TH/rF+terfDz/j6i+ooAPiH/x9S/U15TJ/rG+terfEP/j6l+prymT/AFjfWgBtFFFABRRRQAq/eFWrv7qfSqq/eFWrv7qfSgCpViz/ANePrVerFn/rx9aAC8/15+tV6sXn+vP1qvQAVfsf9W/+7VCr9j/q3/3aAKUn+sNNp0n+sNNoAK9D8Df8eN7/ANcTXnleh+Bv+PG9/wCuJoA4jVP+P6T/AHjVOrmqf8f0n+8ap0AW7D/Xr9aL/wD17fWiw/16/Wi//wBe31oAqUUUUAT2n+uH1q1qf+s/Cqtp/rh9atan/rPwoAzqKKKACiiigAqzZ/69frVarNn/AK9frQBa1X/W/hWZWnqv+t/CsygAooooAfD/AK1frXuXhH/kn+o/7orw2H/Wr9a9y8I/8k/1H/dFAHjOrf8AH9J9aoVf1b/j+k+tUKACiiigAooooAK6Dw//AK38K5+ug8P/AOt/CgDP1T/j5b61n1oap/x8t9az6ACiiigAooooA3PD/wDx9J9aPEH/AB9P9aPD/wDx9J9aPEH/AB9P9aAMOiiigAooooA9H+F3/Iet/wDeFd98dP8Aj9H+4P5VwPwu/wCQ9b/7wrvvjp/x+j/cH8qAPnx/vmhPvCh/vmhPvCgDtvDv/HpN/uGuY1X/AI+W+tdP4d/49Jv9w1zGq/8AHy31oAzqdH98U2nR/fFAHe+Gv+PKX/cNcprP/H0/1rq/DX/HlL/uGuU1n/j6f60AZdFFFAG1oP8Ax9J9a7HxT/x4w/7grjtB/wCPpPrXY+Kf+PGH/cFAHm83+sNMp83+sNMoAdH/AKxfrXq3w8/4+ovqK8pj/wBYv1r1b4ef8fUX1FAB8Q/+PqX6mvKZP9Y31r1b4h/8fUv1NeUyf6xvrQA2iiigAooooAVThhU9xKjqu05wPSq9FABU1s6xyhmOBUNFAE1y6ySllORUNFFABVu1njjRg7YJHpVSigBzkFyR0ptFFABXZeFNd03TbS6S7ufLZ4yqjYxyfwFcbRQBZv5UmuneNsqTwcVWoooAsWkqRTBnOB9KLuVJZiyHI+lV6KACiiigCW3dUlBY4FWL6eOZ8xtkfQ1SooAKKKKACiiigAqa2kWOUFjgVDRQBf1C4inkzG+4fQiqFFFABRRRQA+NgrgnpmvVfDvjLQLDwfeWFzf7LmRQETyZDn8QuK8nooAt6hNHPdO8bblJ4OMVUoooAKKKKACiiigArY0e+trWTM0m0Y/uk/yrHooAuX88c87NG24E+mKp0UUAFFFFABRRRQBq6PeW9rcK00m1QeuCf5UaxeW91cM0Mm5SeuCP51lUUAFFFFABRRRQB2vgLXtM0XV4Z9QufJjVgS3ls38ga6/4r+OfDniW6D6RqP2ldoGfIkTt/tKK8booAViCxIoU4YE0lFAHVaLrFhaW8iTz7GK4A2Mf5CsLUJ4p52aNtwJ9CKpUUAFOQgMCabRQB1+h63p1nbSJPcbGKkAbGP8AIVz+p3MVxOzRPuBPoRVCigAooooA1NJu4LWdWmfaAeuCf5V0uv8AiDS720iS3ut7KgBHlsP5iuGooAfIwZyR0plFFADkIDgnpXoHg3xNpGk3EbXt35Sg8ny3b+QNee0UAeheMvE2katcSNZXfmqTwfLdf5gV5+5BckdKbRQAUUUUAFFFFABUzJbiyidZWNyZHEke3hUAXac9ySX4/wBketP057WPU7R76J5bRZkM8afeePcNwHI5IzXoGraZcWWi3V3qttYvpzwuLdbfw/JayLIVIjPmmFAAG2k5dsgEc5oA82oore8JJBNq7wPAstzLCVtDJatcokuVOWjUEsNocfdbkg4OKAMe6S3SVRbStJH5cZLMuCHKAuPoG3DPfFQ12njGyk07ToYNUgtv7TeYNFLa6S9ioiAbcGDRRbiSUx8pxg881xdABRRVmx06+1S4+z6fZXF3PjPl28TSNj1wATQBXZWRyjqVZTggjBBpKvaxb6pb6nM2sW1zb3szGWRbmIxuxYkk4IHU5qjQAUUVZsdOvtUuPs+n2Vxdz4z5dvE0jY9cAE0AV2Vkco6lWU4IIwQaSr2sW+qW+pzNrFtc297MxlkW5iMbsWJJOCB1Oao0AFKVZQpKkBhlSR1GccfkaWOOSaVYokZ5HIVVUZLE9gK09V0rXrC2tTq2mX9rBGnlQG5tmjGCzPgEgZ5Zj+NAGVRRRQAUu1ggcqdpJAOOCR1/mPzpOprXvNG8Q2ekRNe6VqMGno7SJJNaukYZwoJ3EY5Cr37UAZFFFFABRRRQAUUU6OOSaVYokZ5HIVVUZLE9gKAEKsoUlSAwypI6jOOPyNJWrqula9YW1qdW0y/tYI08qA3Ns0YwWZ8AkDPLMfxrKoAKKKKAClVWdwiKWZjgADJJpK0tFsNZu75J9Esby5ubZ1lU2sDSmNgcg4APcd6AM2iprq0uLK5e3u7eW3nQ4eKVCjL9QeRUNABRRRQAUUUUAFFFWbHTr7VLj7Pp9lcXc+M+XbxNI2PXABNAFdlZHKOpVlOCCMEGkq9rFvqlvqczaxbXNvezMZZFuYjG7FiSTggdTmqNABRRRQAUUUUAFFFTTpbrFbGGVnkaMmZSuAj72AA9RtCnPufSgCGiiigAooooAKKKmnS3WK2MMrPI0ZMylcBH3sAB6jaFOfc+lAENFFFABRU1mlvJewJdytFbNIolkVdxRM8kDuQM8VDQAUUUUAFFTWaW8l7Al3K0Vs0iiWRV3FEzyQO5AzxUNABRRRQAUUVMyW4sonWVjcmRxJHt4VAF2nPckl+P9ketAENFFFABRRRQAUUrKyHDKVOAcEY4IyKSgAooooAKKKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vroPCTMLy+RxGLKS0K3kjzmHy4t6chlDEHdsGArZzjBzXP10fgtJW1edoXnaRLdmFrAEL3fzKPKAcMp67sFW+50JoAueIEsbfw35GhyR3GmG7Rp5ftjzOkux9gw8MW0Eb+Qpzt68VyFdz4yS5bRIpLjT73Rdtwqrp11DDF5uVbMoWOKLO3GCSp++MHqK4agArqPCohk07U4NRaOLSXeHz5muWhZZAH2KCschbI3nGw/dzxiuXrrvBCT7dQkt4bq/dfLU6ZbRxSNcA7vnKyJICEx/cY/OOnNAEPioQx6dpkGnNHLpKPN5Ey3LTM0hCb1JaOMrgbDjYPvZ5zXL113jdJ9mnyXEN1YO3mKNMuY4o2twNvzhY0jAD5/uKfkPXiuRoAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaANjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rf1KLS7Xw9qa+Hp4rlJUT7bm+klaNBIhDKrQQj7+wZ+bGe2c1geFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vooooAKKKKACtjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rHrY8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpQBv6lFpdr4e1NfD08VykqJ9tzfSStGgkQhlVoIR9/YM/NjPbOa4ivRPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/+IfKenBrzugAooooAK6/w+ljceG/I1ySO30wXbtBL9seF3l2JvGEhl3ADZyVGN3XmuQruPB1rey6fAbeVJrZ7m4NzayQQThdkSMhVJVYKzklN2McD0oAx/FrMbyxRBGbKO0C2ciTmbzIt78lmVSTu3jBVcYxgYrn66Hxe5m1Czud8wFxaJKtvMULWylmAT5FVQCAHACrw447nnqACiiigAooooAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaACiiigAooooAtadYTanepawFFdgzFpG2qiqpZmJ9AoJ/Cr91oCxWU91Z6vp2opbgNOtt5qtGpYKGxIiZG5lHy56ik8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpXWeJVvJPDl41xpGo6CibGCXNvbwreneo2/u4IixGd/8Q+U9ODQB53RRRQAUUUUAWtOsJtTvUtYCiuwZi0jbVRVUszE+gUE/hV+60BYrKe6s9X07UUtwGnW281WjUsFDYkRMjcyj5c9RSeFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv8AQGgDnKKKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv9AaAOcooooAmtLWa+vYLS2TfPPIsUa5A3MxwBk+5rXfw2pimNprel3txDG0r20DSh9qgsxBeNUbABPDHgcZrO0lPN1mxj+1/Y91xGv2nOPJyw+fOR069e1eiaot9LpN+s2j6po8Yt5GOpT29tGlxhSdhdIEY78beJGyW7igDy+iiigAoorT0fSTqy36Rt/pENuJYY9yjzG8xFK89flZjgc/LQBRuLma6kEk7l3CJGCf7qKFUfgoA/Coq6nxL4dsdJtJzbi7jltbtbUtcSoy3WULGSNQAVAwvBLf6xeQeK5agAooooAKKKKALOnTW9vqdrNdwefbRzI80OceYgILL+IyK6P+2tDuri7N/ZxF0t51tLm2so4BIzRMqCSFTtGGIIZeQRzu4IKKAOTrT0O80yyup5NU01L+JoHWKN2cBJeCrHY6EjjB+boxPJAFFFABqOqWd7brHb6Dp1g4fcZbZ7hmIwflPmSuMc56Z4HPWsyiigAra0nUrCx0a+jubC1vJ5riDak6NkRhZd+11IKnJj6HnjIIFFFAFTUv7KbypdM+1x7s+ZBcbW8vpja4xuB56quMd6oUUUAFb3h5tDKNHrD+Xi8t5QfKZt8S7/MTK8ru3J27UUUAHiK+sb2C1MJtHu1kmEslpZi2jMWVEY2gAFuHOcZwwBJI4waKKALmlai2lanBfLbWt0YiT5N3CJYnBBBDKeD1/DqOa6PxF4stdU8P21lZW1tAXnlkni/s22jMQIi2hJI0U9UbJAUkEA5AySigDkKKKKALWmS2sGq2c19CZrSOdGniHV0DAsO3UZFdNLr+m3cV7Dc2ulJEttIga305Uku5vmEUi4UeUF+TKggEKeGJNFFAHH0UUUAFFFFABWhod1aWWrw3F9CJYFDggxCQKxUhW2MQG2sQ208HGD1oooA1JtW0a903U2nsY4tUeIRwS28CpFL+9Rt5QcRPsVh8vBBxgck83RRQAUUUUAFbem6ppFlo8kVzoNrf6h5+5ZrmSYL5RXBXEcqYIIyODncemBkooAz9RvIL24WS3021sECbTFbNKyk5PzHzHc55x1xwOOtVKKKACiiigAooooAK19G1DSrG1vhqGjw6hcOqG2MzyhEIPzAiORDgg9cnBUcck0UUAV9S1C2vvK+z6PZadszu+yvM3mZx18yR+mO2OpznjFCiigAooooAKKKKANPQLu1sdV+0XkUMsS284VJ4vMQyGFwmV/3yvPbrxin3k+i3lo80NrPYXoxiGJvNgk55xuO9MDJ5L59qKKAMmiiigAooooA0NGexTUf+JjxbNDMhbZv2u0TKjY9mKn8K3NUvdDGjTWVq9pMqW0AtjHZbJfPyPNdpSAxUgPwSR86gD5c0UUAcnRRRQB0Xh3xJDpVrLY3Wn2UsMr7xdNYQTzwnAHHmqQy8fd49iMnNbxVrA13xJfX0awrA80nk+VbRwZjLsVLBAMtg8k5PqTiiigDGooooA6LSNasNM0RkbT7O7u2mk3rc2yvuUoPLKucldjgkqMBg2DkdKviK6s7u7tntTbvKLcC6ktrcQRSS7mOVQBQBtKD7oyQTjnJKKAMeiiigCzp01vb6nazXcHn20cyPNDnHmICCy/iMiuj/ALa0O6uLs39nEXS3nW0ubayjgEjNEyoJIVO0YYghl5BHO7ggooA5OiiigArX0O/s9PTUpLq1trp3tQkEVzEXUv5sZPIwVOwP8wIPvzRRQBFqD6PPbrNYRXVrcFsPbORJHjByyvww5wNpB6/eNZtFFABRRRQAUUUUAf/Z", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAFoCAIAAAAElhK7AAAbC0lEQVR4Ae3dTaxdVdkAYG4vUOSvRQMUpI1JIRoSTUeYiDKhECKJRutPJCFGYiIJMSZqHDkyDMCBDgwTEhOVCQwalDAg0AjG3xAIN1ZhoBICJAoItJW/CqWujwN7n+909e5zetbad629nzvQdVbXfte7nnfv+3LvOZSTTvJFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gVWNjDD1dXVU0455eyzz77ooot27dp19dVXf/nLXw753HXXXffff//a2tqzzz576NChN99888iRIxuYp60JECBAYMACmwZ8NkcjQIAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqExgpbJ8E6W7srKyurq6efPmrVu3bt++fdeuXbt3796zZ08Iv3fv3n379q2trT3zzDMHDhw4fPjwkSNHjh49mmhnYQgQIECgLAF/xVpZ9ZANAQIECPQsoBH2DG47AgQIEChLQCMsqx6yIUCAAIGeBTTCnsFtR4AAAQJlCWiEZdVDNgQIECDQs4BG2DO47QgQIECgLAGNsKx6yIYAAQIEehbQCHsGtx0BAgQIlCWgEZZVD9kQIECAQM8CGmHP4LYjQIAAgbIENMKy6iEbAgQIEOhZQCPsGdx2BAgQIFCWgEZYVj1kQ4AAAQI9C2iEPYPbjgABAgTKEtAIy6qHbAgQIECgZwGNsGdw2xEgQIBAWQIaYVn1kA0BAgQI9CygEfYMbjsCBAgQKEtAIyyrHrIhQIAAgZ4FNMKewW1HgAABAmUJaIRl1UM2BAgQINCzgEbYM7jtCBAgQKAsAY2wrHrIhgABAgR6FtAIewa3HQECBAiUJaARllUP2RAgQIBAzwIaYc/gtiNAgACBsgQ0wrLqIRsCBAgQ6FlAI+wZ3HYECBAgUJaARlhWPWRDgAABAj0LaIQ9g9uOAAECBMoS0AjLqodsCBAgQKBnAY2wZ3DbESBAgEBZAhphWfWQDQECBAj0LKAR9gxuOwIECBAoS0AjLKsesiFAgACBngU0wp7BbUeAAAECZQlohGXVQzYECBAg0LOARtgzuO0IECBAoCwBjbCsesiGAAECBHoW0Ah7BrcdAQIECJQloBGWVQ/ZECBAgEDPAhphz+C2I0CAAIGyBDTCsuohGwIECBDoWUAj7BncdgQIECBQloBGWFY9ZEOAAAECPQtohD2D244AAQIEyhLQCMuqh2wIECBAoGcBjbBncNsRIECAQFkCGmFZ9ZANAQIECPQsoBH2DG47AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZBdYyb5DMRusrKxs2rTptNNO27Jly44dO3bt2rV79+49e/aEBPfu3btv3761tbWnn3764MGDb7zxxttvv3306NFicpcIAQIECOQS8Fes5ZIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILCAwMoCawe0dGVlZXV1dfPmzVu3bt2+ffuuXbt27969Z8+ecMS9e/fu27dvbW3tmWeeOXDgwOHDh48cOXL06NEBnd5RCBAgQKAV8FestRZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEBi6wMnO+lZWVTZs2nXzyyZs3bz799NPDn7722muHDx9+66233n777aNHj86sP/ZliBAuP+WUU0477bQmwhtvvPHmm2+GIPNEODbmZGaS2Kmnnhoin3HGGWHy1VdfDZH/+9//TtI73oWd85OcQ9gtW7ZccMEFO3fuvPTSS8NVjz/++D/+8Y9//vOfBw8eDBvNn/+EMSAExve9731nnnlmiPbKK6+8/vrrATNQzIm5TuZBI3xNnGe2mGiHLcLXOhF6+KPgEL6a+yFU7ayzzgr7/uc//5nUrrkrlrkxjneQsHUgCruH+3BS2YsvvvhjH/vYZZddFi55+OGH//znP//973+f1Dfc5/Pf5MfbMdP85CCrq6vh5g+1DobhOO9///vDdi+99FK4OYNnuLXCg3DkyJFQ9ByYcx7tnbvy/27LyTeQs88+e+vWrR/4wAfC5S+++OKBAwcOHTo0+ZYyeQqy3qLBbYIWbryQxuTR/uhHPxpugMsvvzyk9Pvf/z7cBvv375885iG9cFtOGNc3nEQOt9Z0RcIxzz333BD2hRdeCIedrku4tUJp1o85LRwYQ+YTxpB8YDznnHNC/PPOOy8se/7550P8l19+OWCGhCffUialnw4yz7g5SPOdalKvsNG2bdtChH/961+T7QJO8x1s0eNMZ9J8Vwy38WSv888/P5Tmoosu2rFjR1j59NNPP/vss+GpfO655yb3zOT2nhxzOlTCsb9iLSGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQWETgc5/73CLLI2uXjxAJaooAAQIECPQj8NBDDy250fIRlkzA5QQIECBA4HgC3f89wt/97nfHu3jO+eUjzLmRZQQIECBAgAABAgQIECAwJoHcb0CmjZ82WrTOPWwR3XehyUKSLCSNhehmFg/gCDMn8pIAgYUFcr8BmTZ+2mhRrB62iO670GQhSRaSxkJ0M4sHcISZE3lJoH+B+HuEW7ZsueWWW/bv3//8O19hcOutt4bJ+fNbPsKce+V+AzJt/LTRokQ9bBHdd6HJQpIsJI2F6GYWD+AIMyfykkApAr/85S+/853vfPCDH9z0zlcYhJdhcv78lo8w/15WEiBAgACBxALRf8yMTh5v4+ji6OTxIpgnQGBgAqW9o5kpn0xhm5shd/xmozDoc6/pfXsex381+u9///vb3/72hRdeOPmJMAzCyzA5f3LLR5h/LysJEKhC4Fvf+lZReWbKJ1PYhi53/GajMOhzr+l9ex7HG+FXv/rVbdu23X///c+98xUG4WWYnD+55SPMv5eVBAhUIVDa74Qy5ZMpbFPi3PGbjcKgz72m9zUmQIAAAQIENlpg+c98Lh9hHYOswcO+aeOnjRZl6WGL6L4LTRaSZCFpLEQ3s3gAR5g5kZcEGoENeVcy/qvRn//85y+88MI111wTfiMavsIg/GsUYbLJtXOwfIR1tsgaPOybNn7aaFGWHraI7rvQZCFJFpLGQnQziwdwhJkTeUmgESjoXcno74Wjk032M4Po4ujkzIXzvIzGiU7OE+3YNdFQ0cljrz12JnphdPLYa+eciUaLTs4ZMMeyaD7RyRy7NzGjO0Ynm0tKG0SzjU6Wlrl8CHQK3HzzzZ1rki+I/0S4/Gc+l4+wzlGzBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOIBHGHmRF4SaAS+//3vN+MNHoQ3IX74wx/+5S9/Cb8gDV9hEF6GyfnTWj7COntlDR72TRs/bbQoSw9bRPddaLKQJAtJYyG6mcUDOMLMibwkQIAAAQIECBAoTCD8I2fJf9fo8umt7502ftpo0cx72CK670KThSRZSBoL0c0srusIpWWbKZ9MYZvS547fbBQGfe41ve8GjuPvES7/sbTlI6yDkjV42Ddt/LTRoiw9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3re4cfQTaNHJ46UeXRydPF6EdeajcaKT6wRZ54+ioaKT6wRp/ih6YXSyuWTRQTRadHLRyAnXR/OJTibc9NhQ0R2jk8deW8hMNNvoZAkJRxOLTvaTbXTr6ORC+UQjRCcXCtssjoaKTjaXnPAgGjY6ecJblHZh/CfC5T+WtnyEdaSyBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOK6jlBatpnyyRS2KX3u+M1GYdDnXtP7FjcOvyP2qdFlPjQ7XdHlMaejRcc9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3teYAAECBAgQKEkg/BOBT43u378//MVy4SsMbr311mByYiVaHrNz3x626Myhc0EhSTZ/b8XOnTt//etfh39NNvxvGHfmX86CQiTnBCkt20z5ZArbIOeO32wUBn3uNdm30Kdy+f++/PIRpgszM84aPOyVNn7aaDMUk5c9bBHdd6HJQpJ86KGHJmnfc889n//851dXVz/xiU/cd999C51lYxcXIjknQmnZZsonU9gGOXf8ZqMw6HOvyb6FPpXRDwhFJ6f5psfRxdHJ6avmHEfjRCfnDDizLBoqOjlzYfRl9MLoZPTyeSaj0aKT80TLtCaaT3QyUwKTsM0jN711M5l161TBpzNvYkYnmz/dwEE0sehkP0lGt45OLpRPNEJ0cqGwzeJoqOhkc8kJD6Jho5MnvMXMhc0DOL1LMzmzOMdLnxqNqKb90FTaaJF0K/mUVw8OUZyZyR07dtx4442f/exnH3/88Ztuuin8RBjGJ5988syykl8WIjknUWnZZsonU9gGOXf8ZqMw6HOvyb4b/lTGG+Hy/3355SNMF2ZmnDV42Ctt/LTRZigmL3vYIrrvQpOFJPmNb3zj1Vdf/fCHPxz+N7w1uH379q985Stf+9rXFjrLxi4uRHJOhNKyzZRPprANcu74zUZh0Odek30H8FROAxp3C/ziF7/oXmQFAQI1C3jMa67ee7lfffXVTzzxRPjA5E9/+tNzzz13Mn3vvfe+9+cb/P/hJ/cf/OAH5513XqY80n6E6fXXXz906NCB977eeuutyTBh8tdee+1TTz31t7/97corrwxP4IsvvviHP/zhIx/5SMItlg9V+E21/AFFiArkflqjm64zmfbpbjbK/Zj3+Yz3/6iWdpO8W9a1tbXwbfSMM84IPyOHf6/84x//ePiDhd66PHjw4Hvf+f/f/zf3zTKD8F3+61//+mOPPXb77beH33EtEyp6bXPSJB8s/PSnPx0S/tKXvjTZqwke3frEJh999NELLrhg9+7d4dd94R2v8L7Xpz71qQceeODEomW6avmbKkliWe/MJBl2BqnrCLmf1k6umQXNA5jk6W6C537M+3zG+39US7tJ3i3rb3/7202b3n37MLyP8sgjj4T3UZobqKn9OoOf/exn3/zmN9dZsMwfTTJZWVn5zGc+85vf/OZXv/rVFVdcsUzAmWubk6b6CNNZZ51122233X333RdeeGETfGbTZV42Me+4444mTpBpxiUMlr+pkpwi652ZJMPOIHUdIffT2sk1s6B5WFI93U38rI95k3YPz3j/j2ppN8m7Nb3hhhvCt9EvfvGLk9dnnnnm3r17X3nllabknYOtW7cGzW3btnWuPIEFzT0xuTb8wBrS++Mf/3gCoaKXPPnkk5MPFoafOJsPFk4/NtGrOic/+clP/ulPf/rrX//auXLRBXfdddd3v/vdU089dXJh+AzkF77whQcffHDROFnXL39TJUkv652ZJMPOIHUdIffT2sk1syDT093skukx7/MZ7/9RLe0maap5UnhrcPpNpvDjVzmfrLvzzjvbRN8bXXzxxe8Nl/3/q6666vrrr//e97734x//+Ec/+tGHPvShsOMll1yybNyTTtq8efN11123fJyZCKeffnrI9pxzzpnM/+QnPwnvFIZPJM8s2/CXJd9UG44z1ARyP62LuuV7uptMcjzmPT/jPT+qpd0kTSkNCBAgQIAAAQIECBAgQGDoAv8DNIwUyHDTngMAAAAASUVORK5CYII=", + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Making a gel to show the PCR products\n", + "im = gel(\n", + " [\n", + " GeneRuler_1kb_plus,\n", + " [pcr_product_F1],\n", + " [pcr_product_F2],\n", + " [pcr_product_F3],\n", + " [pcr_product_BAC],\n", + " ]\n", + ")\n", + "im.rotate(90, expand=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: True\n", + "size: 23827\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 28\n", + "/molecule_type=DNA\n", + "Dseq(o23827)\n", + "GCAG..ccgc\n", + "CGTC..ggcg\n" + ] + } + ], + "source": [ + "# Performing the Gibson Assembly. Note that the assembly class parameters should be given as a list.\n", + "\n", + "assembled = Assembly([Dseqrecord(pcr_product_F1), Dseqrecord(pcr_product_F2), Dseqrecord(pcr_product_F3), Dseqrecord(pcr_product_BAC)])\n", + "assembled_circ = assembled.assemble_circular()\n", + "\n", + "# Printing out the Gibson Assembly product\n", + "print(assembled_circ[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Example_Restriction.ipynb b/docs/notebooks/Example_Restriction.ipynb index 29e0ca56..b50edcec 100755 --- a/docs/notebooks/Example_Restriction.ipynb +++ b/docs/notebooks/Example_Restriction.ipynb @@ -1,1017 +1,1017 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example of a Plasmid Restriction/Ligation Cloning\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "This example showcases a workflow of modelling molecular cloning with restriction enzymes, PCR, and ligases, to clone gene fragments into plasmids. This example constructs a synthetic plasmid by cloning the ase1 gene, which encodes a microtubule associated protein responsible for mitotic spindle assembly, into the pFA6a-kanMX6 cloning vector:\n", - "\n", - "1. The ase1 gene fragment is first cloned from a portion of the _S. pombe_ genome through PCR:\n", - "2. The pFA6a-kanMX6 cloning vector is then cleaved with AscI and SalI. The ase1 gene fragment is also cleaved with SalI and AscI\n", - "3. The fragment is ligated with the linearized pFA6a-kanMX6 vector.\n", - "\n", - "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Importing all necessary classes and methods\n", - "\n", - "from pydna.parsers import parse\n", - "from pydna.tm import tm_default\n", - "from pydna.amplify import pcr\n", - "from pydna.dseqrecord import Dseqrecord\n", - "from Bio.Restriction import SalI, AscI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS pFA6a-kanMX6 3938 bp ds-DNA circular SYN 16-JUN-2022\n", - "DEFINITION synthetic circular DNA.\n", - "ACCESSION .\n", - "VERSION .\n", - "KEYWORDS pFA6a-kanMX6.\n", - "SOURCE synthetic DNA construct\n", - " ORGANISM synthetic DNA construct\n", - " .\n", - "REFERENCE 1 (bases 1 to 3938)\n", - " AUTHORS Bahler J, Wu JQ, Longtine MS, Shah NG, McKenzie A 3rd, Steever AB,\n", - " Wach A, Philippsen P, Pringle JR\n", - " TITLE Heterologous modules for efficient and versatile PCR-based gene\n", - " targeting in Schizosaccharomyces pombe.\n", - " JOURNAL Yeast. 1998 Jul;14(10):943-51.\n", - " PUBMED 9717240\n", - "REFERENCE 2 (bases 1 to 3938)\n", - " AUTHORS .\n", - " TITLE Direct Submission\n", - " JOURNAL Exported Jun 16, 2022 from SnapGene Server 1.1.58\n", - " http://www.snapgene.com\n", - "FEATURES Location/Qualifiers\n", - " source 1..3938\n", - " /organism=\"synthetic DNA construct\"\n", - " /mol_type=\"other DNA\"\n", - " primer_bind complement(35..52)\n", - " /label=\"L4440\"\n", - " /note=\"L4440 vector, forward primer\"\n", - " rep_origin complement(206..794)\n", - " /direction=LEFT\n", - " /label=\"ori\"\n", - " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", - " replication\"\n", - " primer_bind complement(286..305)\n", - " /label=\"pBR322ori-F\"\n", - " /note=\"pBR322 origin, forward primer\"\n", - " CDS complement(965..1825)\n", - " /codon_start=1\n", - " /gene=\"bla\"\n", - " /product=\"beta-lactamase\"\n", - " /label=\"AmpR\"\n", - " /note=\"confers resistance to ampicillin, carbenicillin, and\n", - " related antibiotics\"\n", - " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", - " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", - " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", - " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", - " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", - " LIKHW\"\n", - " primer_bind 1588..1607\n", - " /label=\"Amp-R\"\n", - " /note=\"Ampicillin resistance gene, reverse primer\"\n", - " promoter complement(1826..1930)\n", - " /gene=\"bla\"\n", - " /label=\"AmpR promoter\"\n", - " primer_bind 1998..2016\n", - " /label=\"pBRforEco\"\n", - " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", - " primer\"\n", - " primer_bind complement(2054..2076)\n", - " /label=\"pGEX 3'\"\n", - " /note=\"pGEX vectors, reverse primer\"\n", - " primer_bind 2176..2195\n", - " /label=\"pRS-marker\"\n", - " /note=\"pRS vectors, use to sequence yeast selectable\n", - " marker\"\n", - " promoter 2276..2294\n", - " /label=\"SP6 promoter\"\n", - " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", - " primer_bind 2276..2293\n", - " /label=\"SP6\"\n", - " /note=\"SP6 promoter, forward primer\"\n", - " gene 2407..3763\n", - " /label=\"kanMX\"\n", - " /note=\"yeast selectable marker conferring kanamycin\n", - " resistance (Wach et al., 1994)\"\n", - " promoter 2407..2750\n", - " /label=\"TEF promoter\"\n", - " /note=\"Ashbya gossypii TEF promoter\"\n", - " CDS 2751..3560\n", - " /codon_start=1\n", - " /gene=\"aph(3')-Ia\"\n", - " /product=\"aminoglycoside phosphotransferase\"\n", - " /label=\"KanR\"\n", - " /note=\"confers resistance to kanamycin\"\n", - " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", - " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", - " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", - " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", - " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", - " primer_bind complement(2818..2837)\n", - " /label=\"Kan-R\"\n", - " /note=\"Kanamycin resistance gene, reverse primer\"\n", - " terminator 3566..3763\n", - " /label=\"TEF terminator\"\n", - " /note=\"Ashbya gossypii TEF terminator\"\n", - " primer_bind complement(3867..3886)\n", - " /label=\"T7\"\n", - " /note=\"T7 promoter, forward primer\"\n", - " promoter complement(3868..3886)\n", - " /label=\"T7 promoter\"\n", - " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", - "ORIGIN\n", - " 1 gaggcggttt gcgtattggg cgctcttccg cttcctcgct cactgactcg ctgcgctcgg\n", - " 61 tcgttcggct gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag\n", - " 121 aatcagggga taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc\n", - " 181 gtaaaaaggc cgcgttgctg gcgtttttcc ataggctccg cccccctgac gagcatcaca\n", - " 241 aaaatcgacg ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt\n", - " 301 ttccccctgg aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc\n", - " 361 tgtccgcctt tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc\n", - " 421 tcagttcggt gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc\n", - " 481 ccgaccgctg cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact\n", - " 541 tatcgccact ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg\n", - " 601 ctacagagtt cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta\n", - " 661 tctgcgctct gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca\n", - " 721 aacaaaccac cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa\n", - " 781 aaaaaggatc tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg\n", - " 841 aaaactcacg ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc\n", - " 901 ttttaaatta aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg\n", - " 961 acagttacca atgcttaatc agtgaggcac ctatctcagc gatctgtcta tttcgttcat\n", - " 1021 ccatagttgc ctgactcccc gtcgtgtaga taactacgat acgggagggc ttaccatctg\n", - " 1081 gccccagtgc tgcaatgata ccgcgagacc cacgctcacc ggctccagat ttatcagcaa\n", - " 1141 taaaccagcc agccggaagg gccgagcgca gaagtggtcc tgcaacttta tccgcctcca\n", - " 1201 tccagtctat taattgttgc cgggaagcta gagtaagtag ttcgccagtt aatagtttgc\n", - " 1261 gcaacgttgt tgccattgct acaggcatcg tggtgtcacg ctcgtcgttt ggtatggctt\n", - " 1321 cattcagctc cggttcccaa cgatcaaggc gagttacatg atcccccatg ttgtgcaaaa\n", - " 1381 aagcggttag ctccttcggt cctccgatcg ttgtcagaag taagttggcc gcagtgttat\n", - " 1441 cactcatggt tatggcagca ctgcataatt ctcttactgt catgccatcc gtaagatgct\n", - " 1501 tttctgtgac tggtgagtac tcaaccaagt cattctgaga atagtgtatg cggcgaccga\n", - " 1561 gttgctcttg cccggcgtca atacgggata ataccgcgcc acatagcaga actttaaaag\n", - " 1621 tgctcatcat tggaaaacgt tcttcggggc gaaaactctc aaggatctta ccgctgttga\n", - " 1681 gatccagttc gatgtaaccc actcgtgcac ccaactgatc ttcagcatct tttactttca\n", - " 1741 ccagcgtttc tgggtgagca aaaacaggaa ggcaaaatgc cgcaaaaaag ggaataaggg\n", - " 1801 cgacacggaa atgttgaata ctcatactct tcctttttca atattattga agcatttatc\n", - " 1861 agggttattg tctcatgagc ggatacatat ttgaatgtat ttagaaaaat aaacaaatag\n", - " 1921 gggttccgcg cacatttccc cgaaaagtgc cacctgacgt ctaagaaacc attattatca\n", - " 1981 tgacattaac ctataaaaat aggcgtatca cgaggccctt tcgtctcgcg cgtttcggtg\n", - " 2041 atgacggtga aaacctctga cacatgcagc tcccggagac ggtcacagct tgtctgtaag\n", - " 2101 cggatgccgg gagcagacaa gcccgtcagg gcgcgtcagc gggtgttggc gggtgtcggg\n", - " 2161 gctggcttaa ctatgcggca tcagagcaga ttgtactgag agtgcaccat atggacatat\n", - " 2221 tgtcgttaga acgcggctac aattaataca taaccttatg tatcatacac atacgattta\n", - " 2281 ggtgacacta tagaacgcgg ccgccagctg aagcttcgta cgctgcaggt cgacggatcc\n", - " 2341 ccgggttaat taaggcgcgc cagatctgtt tagcttgcct cgtccccgcc gggtcacccg\n", - " 2401 gccagcgaca tggaggccca gaataccctc cttgacagtc ttgacgtgcg cagctcaggg\n", - " 2461 gcatgatgtg actgtcgccc gtacatttag cccatacatc cccatgtata atcatttgca\n", - " 2521 tccatacatt ttgatggccg cacggcgcga agcaaaaatt acggctcctc gctgcagacc\n", - " 2581 tgcgagcagg gaaacgctcc cctcacagac gcgttgaatt gtccccacgc cgcgcccctg\n", - " 2641 tagagaaata taaaaggtta ggatttgcca ctgaggttct tctttcatat acttcctttt\n", - " 2701 aaaatcttgc taggatacag ttctcacatc acatccgaac ataaacaacc atgggtaagg\n", - " 2761 aaaagactca cgtttcgagg ccgcgattaa attccaacat ggatgctgat ttatatgggt\n", - " 2821 ataaatgggc tcgcgataat gtcgggcaat caggtgcgac aatctatcga ttgtatggga\n", - " 2881 agcccgatgc gccagagttg tttctgaaac atggcaaagg tagcgttgcc aatgatgtta\n", - " 2941 cagatgagat ggtcagacta aactggctga cggaatttat gcctcttccg accatcaagc\n", - " 3001 attttatccg tactcctgat gatgcatggt tactcaccac tgcgatcccc ggcaaaacag\n", - " 3061 cattccaggt attagaagaa tatcctgatt caggtgaaaa tattgttgat gcgctggcag\n", - " 3121 tgttcctgcg ccggttgcat tcgattcctg tttgtaattg tccttttaac agcgatcgcg\n", - " 3181 tatttcgtct cgctcaggcg caatcacgaa tgaataacgg tttggttgat gcgagtgatt\n", - " 3241 ttgatgacga gcgtaatggc tggcctgttg aacaagtctg gaaagaaatg cataagcttt\n", - " 3301 tgccattctc accggattca gtcgtcactc atggtgattt ctcacttgat aaccttattt\n", - " 3361 ttgacgaggg gaaattaata ggttgtattg atgttggacg agtcggaatc gcagaccgat\n", - " 3421 accaggatct tgccatccta tggaactgcc tcggtgagtt ttctccttca ttacagaaac\n", - " 3481 ggctttttca aaaatatggt attgataatc ctgatatgaa taaattgcag tttcatttga\n", - " 3541 tgctcgatga gtttttctaa tcagtactga caataaaaag attcttgttt tcaagaactt\n", - " 3601 gtcatttgta tagttttttt atattgtagt tgttctattt taatcaaatg ttagcgtgat\n", - " 3661 ttatattttt tttcgcctcg acatcatctg cccagatgcg aagttaagtg cgcagaaagt\n", - " 3721 aatatcatgc gtcaatcgta tgtgaatgct ggtcgctata ctgctgtcga ttcgatacta\n", - " 3781 acgccgccat ccagtttaaa cgagctcgaa ttcatcgatg atatcagatc cactagtggc\n", - " 3841 ctatgcggcc gcggatctgc cggtctccct atagtgagtc gtattaattt cgataagcca\n", - " 3901 ggttaacctg cattaatgaa tcggccaacg cgcgggga\n", - "//\n", - "LOCUS CU329670 4538 bp DNA linear PLN 26-APR-2024\n", - "DEFINITION Schizosaccharomyces pombe strain 972h- genome assembly, chromosome:\n", - " I.\n", - "ACCESSION CU329670\n", - "VERSION CU329670.1\n", - "DBLINK BioProject: PRJNA13836\n", - " BioSample: SAMEA3138176\n", - "KEYWORDS .\n", - "SOURCE Schizosaccharomyces pombe (fission yeast)\n", - " ORGANISM Schizosaccharomyces pombe\n", - " Eukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina;\n", - " Schizosaccharomycetes; Schizosaccharomycetales;\n", - " Schizosaccharomycetaceae; Schizosaccharomyces.\n", - "REFERENCE 1 (bases 1 to 4538)\n", - " AUTHORS Lang,B.F.\n", - " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", - " pombe: highly homologous introns are inserted at the same position\n", - " of the otherwise less conserved cox1 genes in Schizosaccharomyces\n", - " pombe and Aspergillus nidulans\n", - " JOURNAL EMBO J 3 (9), 2129-2136 (1984)\n", - " PUBMED 6092057\n", - "REFERENCE 2 (bases 1 to 4538)\n", - " AUTHORS Lang,B.F., Ahne,F. and Bonen,L.\n", - " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", - " pombe. The cytochrome b gene has an intron closely related to the\n", - " first two introns in the Saccharomyces cerevisiae cox1 gene\n", - " JOURNAL J Mol Biol 184 (3), 353-366 (1985)\n", - " PUBMED 4046021\n", - "REFERENCE 3 (bases 1 to 4538)\n", - " AUTHORS Lang,B.F., Cedergren,R. and Gray,M.W.\n", - " TITLE The mitochondrial genome of the fission yeast, Schizosaccharomyces\n", - " pombe. Sequence of the large-subunit ribosomal RNA gene, comparison\n", - " of potential secondary structure in fungal mitochondrial\n", - " large-subunit rRNAs and evolutionary considerations\n", - " JOURNAL Eur J Biochem 169 (3), 527-537 (1987)\n", - " PUBMED 2446871\n", - "REFERENCE 4 (bases 1 to 4538)\n", - " AUTHORS Trinkl,H., Lang,B.F. and Wolf,K.\n", - " TITLE Nucleotide sequence of the gene encoding the small ribosomal RNA in\n", - " the mitochondrial genome of the fission yeast Schizosaccharomyces\n", - " pombe\n", - " JOURNAL Nucleic Acids Res 17 (16), 6730 (1989)\n", - " PUBMED 2780299\n", - "REFERENCE 5 (bases 1 to 4538)\n", - " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", - " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", - " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", - " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", - " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", - " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", - " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", - " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", - " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", - " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", - " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", - " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", - " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", - " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", - " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", - " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", - " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", - " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", - " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", - " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", - " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", - " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", - " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", - " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", - " and Nurse,P.\n", - " TITLE The genome sequence of Schizosaccharomyces pombe\n", - " JOURNAL Nature 415 (6874), 871-880 (2002)\n", - " PUBMED 11859360\n", - " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", - " Cerutti L]]\n", - "REFERENCE 6\n", - " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", - " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", - " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", - " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", - " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", - " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", - " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", - " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", - " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", - " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", - " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", - " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", - " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", - " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", - " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", - " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", - " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", - " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", - " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", - " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", - " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", - " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", - " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", - " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", - " and Nurse,P.\n", - " TITLE The genome sequence of Schizosaccharomyces pombe\n", - " JOURNAL Nature 415 (6874), 871-880 (2002)\n", - " PUBMED 11859360\n", - " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", - " Cerutti L]]\n", - "REFERENCE 7 (bases 1 to 4538)\n", - " AUTHORS Schafer,B., Hansen,M. and Lang,B.F.\n", - " TITLE Transcription and RNA-processing in fission yeast mitochondria\n", - " JOURNAL RNA 11 (5), 785-795 (2005)\n", - " PUBMED 15811919\n", - "REFERENCE 8\n", - " AUTHORS Wood,V.\n", - " CONSRTM The Schizosaccharomyces pombe Genome Sequencing Consortium\n", - " TITLE Direct Submission\n", - " JOURNAL Submitted (29-JUN-2007) European Schizosaccharomyces genome\n", - " sequencing project, Sanger Institute, The Wellcome Trust Genome\n", - " Campus, Hinxton, Cambridge CB10 1SA\n", - "REFERENCE 9\n", - " AUTHORS Wood,V. and Rutherford,K.\n", - " CONSRTM PomBase\n", - " TITLE Direct Submission\n", - " JOURNAL Submitted (13-MAR-2024) University of Cambridge, PomBase, Hopkins\n", - " building, Tennis Court Rd, Cambridge, United Kingdom\n", - "COMMENT On or before Jan 26, 2012 this sequence version replaced\n", - " AL672256.4, AL009197.1, AL009227.1, AL021046.4, AL021809.4,\n", - " AL021813.1, AL021817.2, AL031180.3, AL034486.1, AL034565.1,\n", - " AL034583.1, AL035064.1, AL035248.2, AL035254.1, AL035439.1,\n", - " AL096845.1, AL109734.1, AL109738.1, AL109739.1, AL109770.1,\n", - " AL109820.1, AL109831.1, AL109832.1, AL109951.1, AL109988.1,\n", - " AL110469.1, AL110509.2, AL117210.1, AL117212.1, AL117213.1,\n", - " AL117390.1, AL121732.1, AL121741.1, AL121745.2, AL121764.1,\n", - " AL121765.1, AL121770.1, AL122032.1, AL132667.1, AL132675.1,\n", - " AL132714.1, AL132769.1, AL132779.2, AL132798.2, AL132828.1,\n", - " AL132839.1, AL132983.1, AL132984.1, AL133154.2, AL133156.1,\n", - " AL133157.1, AL133225.2, AL133302.1, AL133357.1, AL133359.1,\n", - " AL133360.1, AL133361.1, AL133442.1, AL133498.1, AL133521.1,\n", - " AL133522.1, AL135751.1, AL136078.1, AL136235.1, AL136499.1,\n", - " AL136521.2, AL136538.1, AL137130.1, AL138666.2, AL138854.1,\n", - " AL139315.1, AL157734.1, AL157811.1, AL157872.1, AL157917.1,\n", - " AL157993.1, AL157994.1, AL158056.1, AL159180.1, AL159951.1,\n", - " AL162531.1, AL162631.1, AL163031.1, AL163071.1, AL163191.2,\n", - " AL163481.1, AL163529.1, AL353014.1, AL353860.2, AL355012.1,\n", - " AL355013.1, AL355252.1, AL355452.1, AL355632.1, AL355652.1,\n", - " AL355653.1, AL356333.1, AL356335.1, AL357232.1, AL358272.1,\n", - " AL360054.1, AL360094.1, AL390095.1, AL390274.1, AL390814.1,\n", - " AL391713.1, AL391744.1, AL391746.2, AL391783.1, AL441621.1,\n", - " AL441624.1, AL512486.1, AL512487.1, AL512491.1, AL512493.1,\n", - " AL512496.1, AL512549.1, AL512562.1, AL583902.1, AL590562.1,\n", - " AL590582.1, AL590602.1, AL590605.1, AL590902.2, AL590903.1,\n", - " AL691401.1, AL691402.1, AL691405.1, Z49811.1, Z50112.1, Z50113.1,\n", - " Z50142.1, Z50728.2, Z54096.1, Z54142.2, Z54285.2, Z54308.1,\n", - " Z54328.1, Z54354.1, Z54366.1, Z56276.2, Z64354.1, Z66568.2,\n", - " Z67757.1, Z67961.2, Z67998.1, Z67999.1, Z68136.2, Z68144.1,\n", - " Z68166.1, Z68197.2, Z68198.1, Z68887.1, Z69086.1, Z69239.1,\n", - " Z69240.1, Z69368.1, Z69369.1, Z69380.1, Z69725.1, Z69726.1,\n", - " Z69727.1, Z69728.1, Z69729.1, Z69730.1, Z69731.1, Z69795.1,\n", - " Z69796.1, Z69944.1, Z70043.1, Z70690.1, Z70691.1, Z70721.1,\n", - " Z73099.2, Z73100.2, Z81312.1, Z81317.1, Z94864.1, Z95334.1,\n", - " Z95395.1, Z95396.2, Z97185.1, Z97208.1, Z97209.1, Z97210.2,\n", - " Z98056.2, Z98529.1, Z98530.2, Z98531.2, Z98532.1, Z98533.1,\n", - " Z98559.1, Z98560.1, Z98595.1, Z98596.1, Z98597.1, Z98598.1,\n", - " Z98600.1, Z98601.1, Z98602.1, Z98603.1, Z98762.1, Z98763.1,\n", - " Z98849.1, Z98944.1, Z98974.2, Z98975.1, Z98977.4, Z98978.1,\n", - " Z98979.1, Z98980.1, Z98981.3, Z99091.2, Z99126.1, Z99161.1,\n", - " Z99162.1, Z99163.2, Z99164.2, Z99165.1, Z99166.1, Z99167.1,\n", - " Z99168.1, Z99258.1, Z99259.1, Z99260.2, Z99261.1, Z99262.1,\n", - " Z99292.1, Z99295.1, Z99296.2, Z99531.1, Z99532.2, Z99568.2,\n", - " Z99753.1.\n", - "FEATURES Location/Qualifiers\n", - " source 1..4538\n", - " /organism=\"Schizosaccharomyces pombe\"\n", - " /mol_type=\"genomic DNA\"\n", - " /strain=\"972h-\"\n", - " /db_xref=\"taxon:4896\"\n", - " /chromosome=\"I\"\n", - " gene <1..676\n", - " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", - " CDS <1..393\n", - " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", - " /codon_start=1\n", - " /product=\"conserved fungal protein\"\n", - " /protein_id=\"CAC21481.2\"\n", - " /translation=\"MMTRMELRPLEIGFSKALTEVAPVTCQCECWDHNLCSSQASEMDL\n", - " IYQSQDTHSCASKQDAVFQLLSETKIPVPNRYRKISHRLSTLSNKKTLKSQLDRFLSSS\n", - " KKLHNDDVNRGDYCFLLSTPVECSASTNSHSYDCLWNFSCNSFPEYSSYSASETSSVAS\n", - " YSYYSGPNPATPSSSSCNLVNANSLDIYLNINNLKKSKSVPRLRGQFMEPVEHNHPLSK\n", - " SLEEQSSFLEQSKDASSNLTACNRSGSSLSSNFYSSRLSKKTSLASLNKSRASLQHKIM\n", - " SLSRNIIRRVFHKPEVHLDPSASILNLSSSHGESNLTNGLLCQNFKLFQDDWLMEDCAP\n", - " DANFTLYTPLQPWEKRSVKPEIRRPRLNPNFFRVFVLEAQMRRAGKLSANTAGRAQLIY\n", - " LPKPAVTFSTSPLHVEL\"\n", - " gene complement(<1..1972)\n", - " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", - " ncRNA complement(<1..1972)\n", - " /ncRNA_class=\"lncRNA\"\n", - " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", - " /product=\"non-coding RNA\"\n", - " 3'UTR 394..676\n", - " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", - " gene 1001..3538\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " 5'UTR 1001..1173\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " CDS join(1174..1597,1645..3416)\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " /codon_start=1\n", - " /product=\"antiparallel microtubule cross-linking factor\n", - " Ase1\"\n", - " /protein_id=\"CAC21482.1\"\n", - " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", - " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", - " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", - " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", - " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", - " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", - " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", - " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", - " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", - " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", - " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", - " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", - " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", - " 3'UTR 3417..3538\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " gene complement(3510..>4538)\n", - " /gene=\"ypt71\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", - " 3'UTR complement(3510..3690)\n", - " /gene=\"ypt71\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", - " CDS complement(join(3691..4137,4192..>4290))\n", - " /gene=\"ypt71\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", - " /codon_start=1\n", - " /product=\"GTPase Ypt71\"\n", - " /protein_id=\"CAC21483.1\"\n", - " /translation=\"MSAQKRVFLKVVILGDSGVGKTCLMNQFVNQKFSREYKATIGADF\n", - " LTKDVVVDDKLVTLQLWDTAGQERFQSLGMAFYRGADCCVIVYNVNNSKSFDSVENWRQ\n", - " EFLYQTSQDECAFPFIIVGNQIDKDASKRAVSLHRALDYCKSKHGSNMIHFEASAKENT\n", - " NVTDLFETVSRLALENESSRDDFVNDFSEPLLLSKPLNNTSSCNC\"\n", - " gene 4049..>4538\n", - " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", - " ncRNA 4049..>4538\n", - " /ncRNA_class=\"lncRNA\"\n", - " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", - " /product=\"non-coding RNA\"\n", - "ORIGIN\n", - " 1 atcatcagac gtgtatttca caagccagaa gtgcatttgg atccaagtgc ctccatttta\n", - " 61 aatctctcat cttcgcatgg cgaaagcaac ctgacaaatg gtttgctttg tcaaaatttc\n", - " 121 aagctttttc aggatgattg gttgatggag gattgtgcgc cagatgccaa tttcactttg\n", - " 181 tacaccccgc ttcaaccctg ggaaaagcga agtgtgaaac ctgaaatcag acgtcctcga\n", - " 241 ttaaatccta attttttccg agtatttgtt ttagaagctc aaatgcgacg agctggaaag\n", - " 301 ctatcagcaa acactgctgg ccgagcccag ttaatttacc tcccaaagcc tgccgttacc\n", - " 361 ttctccacta gccctttgca tgttgaattg taaaaattta acgcatgact tatatacatt\n", - " 421 tgcattcttc caagctggtt atatttattt tcattttttt ctcacccaat acttttttat\n", - " 481 ccctactgtc tttatggaca atcgactcac aattgtttct ttttgttgta tatgattttt\n", - " 541 tttttaaagg aaatgggttt cgcgatactg ggttgaatcc caattgcggt taatattaca\n", - " 601 taaaataatt ctcccatagt cctagatcct gtctttgaat atgagcaaat aaaagaattg\n", - " 661 aacaaatcat gaatgctttt ctctcttaga tgatattttg tatgcataag tctaattata\n", - " 721 ttgattacga taagacttaa aaagtaagcc tttgtatcct tttaagcagt atttgaattt\n", - " 781 tcttgtatca tattttaggt agagcaaaag ataccagttt gtagaacttt atgtgcttcc\n", - " 841 ttacattggt atatttcagg cacataaata ttcttcaact tacaattcta agtattttgt\n", - " 901 ttatactaaa aggagctgaa taacgtttat acagtgctga cattgaaatc tatttgcttt\n", - " 961 ctttggaata taagcgcatg ctgagttact ttcgcaggcc aagccatatc caaccaccat\n", - " 1021 ttttgtgcca agcttttatg caaggttaat tccttgtact gcttgttatg ttataatata\n", - " 1081 tcaacatctt aacagttttc atatcttcct ttatattcta ttaattgaat ttcaaacatc\n", - " 1141 gttttattga gctcatttac atcaaccggt tcaatgcaaa cagtaatgat ggatgacatt\n", - " 1201 caaagcactg attctattgc tgaaaaagat aatcactcta ataatgaatc taactttact\n", - " 1261 tggaaagcgt ttcgtgaaca agtggaaaag catttttcta aaattgaaag gcttcaccaa\n", - " 1321 gtccttggaa cagatggaga caattcatca ttatttgagt tgtttacaac ggcaatgaat\n", - " 1381 gcccagcttc atgaaatgga acagtgccag aaaaaacttg aagatgactg tcagcaaaga\n", - " 1441 attgattcaa tcagattttt ggtttcctca ttaaagttaa cggatgatac ttctagtctc\n", - " 1501 aaaattgagt ctcctttaat tcagtgtttg aatcgtttgt caatggtaga aggacaatat\n", - " 1561 atggcacagt atgatcaaaa gttaagtacg attaaaggta tgtaatcgtc tttaatttag\n", - " 1621 acttgtgttt taactgatgt atagaaatgt atcacaaatt ggagtcatat tgtaaccgct\n", - " 1681 taggaagtcc gttcgtttta cctgattttg agaattcatt tttatctgat gtatccgatg\n", - " 1741 cttttactga atctttgaga ggacgcatca acgaagccga aaaggagatt gatgcgagat\n", - " 1801 tagaggttat taattccttt gaagaagaaa ttttgggttt gtggtctgaa ctcggtgttg\n", - " 1861 agcccgctga tgttccacaa tacgaacaat tgcttgaatc ccatactaat cgaccaaatg\n", - " 1921 atgtttatgt tactcaagaa cttatcgacc aactttgcaa gcaaaaagaa gttttttccg\n", - " 1981 ctgaaaaaga aaagagaagt gatcatttaa aaagtataca atcagaagtt agcaacttgt\n", - " 2041 ggaataagct tcaagtttct cccaatgaac aaagtcaatt tggcgattca tcaaacatta\n", - " 2101 atcaagaaaa tatttcatta tgggaaactg aacttgaaaa acttcatcag ttaaaaaagg\n", - " 2161 agcatttacc cattttttta gaagactgtc gtcaacaaat tcttcagctt tgggattctc\n", - " 2221 tgttttattc agaagaacaa agaaagtcct ttacacctat gtatgaagac attattacag\n", - " 2281 agcaggttct tacggcccat gaaaactata taaagcaact agaggccgaa gtttctgcta\n", - " 2341 ataagtcctt tttaagctta attaatcgct atgcctcttt aatagaagga aagaaagagc\n", - " 2401 ttgaagctag ttctaatgat gcctctcgtc taacacaacg gggacgccgg gacccaggtt\n", - " 2461 tacttctacg tgaagagaaa atccgtaagc gactttctag agaacttcct aaggttcagt\n", - " 2521 cgctgcttat accagagatt acagcatggg aagaaagaaa tggaaggacg ttcctttttt\n", - " 2581 atgatgaacc acttctcaag atttgccaag aggccactca accaaaatca ttatatagaa\n", - " 2641 gtgcaagtgc tgccgcaaac cgcccgaaaa cagcaactac aacggactct gttaatagaa\n", - " 2701 caccttctca acgagggcgt gtagctgtac cttcaacacc aagtgttagg tccgcttctc\n", - " 2761 gagctatgac gagtccaagg acaccgcttc ctagagtaaa aaacactcaa aatccaagtc\n", - " 2821 gttccattag tgcagaaccg ccatcagcaa ccagtaccgc caatagaaga caccccactg\n", - " 2881 ctaatcgaat tgatataaac gctagattaa acagtgctag tcggtctcga agcgcgaaca\n", - " 2941 tgataagaca aggggcaaat ggtagtgaca gcaatatgtc ttcttcaccc gtttctggaa\n", - " 3001 attccaatac cccttttaac aagtttccaa attctgtatc tcgcaataca cattttgaat\n", - " 3061 ccaagtcacc gcacccaaat tactctcgaa ctcctcatga aacgtattca aaggcttcat\n", - " 3121 ctaagaacgt cccattaagt cctccaaagc agcgtgtagt taatgaacac gctttaaata\n", - " 3181 ttatgtcgga aaaattgcaa agaactaatc tgaaagaaca aacacccgag atggacattg\n", - " 3241 aaaacagctc gcagaacctt cctttttctc ctatgaagat atcccccata agagcatcac\n", - " 3301 ccgtaaagac aattccatca tcaccgtccc ccactaccaa cattttttct gctccactca\n", - " 3361 acaatattac aaattgtaca ccgatggagg atgaatgggg agaagaaggc ttttaagctt\n", - " 3421 cttatttacc taatcgatca aatttaaata tacatatttt tgcatatgaa tacagcatat\n", - " 3481 agataattca taaaagttta ttaactgagg tcataattaa aagactattt acacctaaaa\n", - " 3541 aaaaacgtgt atcaatagag ggaaaagaga agaattaaga acagaaagta accatagttt\n", - " 3601 tgttaaaata gcaatgtaaa aaaatattat gaaaagaaaa cgtatagcac attttgaaat\n", - " 3661 gtaaaagaat ctgagagagc gtgtgaatat ctagcaatta caagaagatg tattattcaa\n", - " 3721 aggctttgaa agaagcaaag gttcagagaa gtcattaaca aagtcatctc tcgagctttc\n", - " 3781 attttctaaa gctaaacgac tgactgtttc gaaaaggtca gtaacgtttg tattttcttt\n", - " 3841 tgcactagct tcaaaatgaa tcatatttga tccatgtttg gatttgcaat agtcaagagc\n", - " 3901 tcgatgaaga gatacggctc gtttagacgc gtctttgtcg atttgatttc caacgataat\n", - " 3961 gaaagggaat gcacattcat cttgtgaagt ttgatataaa aattcttgcc tccagttttc\n", - " 4021 tactgagtca aaagacttcg agttattcac attataaaca attacacaac aatcggcccc\n", - " 4081 tctgtaaaaa gccattccca ggctttgaaa tcgttcttga ccagcagtat cccaaagctg\n", - " 4141 tttataatta gcaaacgaat ttagatgggc ggaacttata ttggaactta cctgtaatgt\n", - " 4201 gaccaatttg tcgtcaacca caacgtcctt ggttaaaaaa tcagcaccga tggtagcttt\n", - " 4261 atattcgcga ctaaactttt gattgacgaa ctaaaatgac gatgttaaca aattgccaaa\n", - " 4321 gcaatactca tagagaagct gatgtaaaga tcgttaacca tatttgagct agtatttaat\n", - " 4381 aacaaagtga ataaatttta aaagcaatca ccttgtagcg acaaataaca acttatcgac\n", - " 4441 ataaaatcaa tgggaaattg cagtattgga ttttacagct caatacaaaa accaaaaaga\n", - " 4501 aaaatatact gaacgtataa aatttaacgc ttcaattg\n", - "//\n" - ] - } - ], - "source": [ - "# Parsing the files\n", - "pFA6akanMX6_path = \"./pFA6a-kanMX6.gb\"\n", - "ase1_path = \"./CU329670.gb\"\n", - "vector = parse(pFA6akanMX6_path)[0]\n", - "pombe_chromosome_I = parse(ase1_path)[0]\n", - "\n", - "# Printing the parsed files\n", - "\n", - "print(vector.format(\"gb\"))\n", - "print(pombe_chromosome_I.format(\"gb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ACCATGTCGACATGCAAACAGTAATGATGGA , Tm: 57.24061148156318\n", - "GGCGCGCCATTAAAAGCCTTCTTCTCCC , Tm: 56.64459495003314\n" - ] - } - ], - "source": [ - "from pydna.design import primer_design\n", - "#Finding the feature containing the CDS with ase1 as a type qualifier\n", - "gene = next(f for f in pombe_chromosome_I.features if f.type == \"CDS\" and\n", - " \"gene\" in f.qualifiers and\n", - " \"ase1\" in f.qualifiers[\"gene\"])\n", - "\n", - "# Using the primer_design function to design primers to amplify the CDS\n", - "# `min` and `max` can be used on a SeqFeature to get the start (leftmost) and end (rightmost) positions\n", - "# this works both on feature with SimpleLocation and CompoundLocation\n", - "amplicon = primer_design(pombe_chromosome_I[min(gene):max(gene)], target_tm=55)\n", - "\n", - "fwd_align, rvs_align = amplicon.primers()\n", - "fwd_primer_ase1 = Dseqrecord(\"ACCATGTCGAC\") + fwd_align # Adding a SalI cut site\n", - "rvs_primer_ase1 = Dseqrecord(\"GGCGCGCCAT\") + rvs_align # Adding a AscI cut site\n", - "\n", - "# Printing out the primers\n", - "\n", - "print(fwd_primer_ase1.seq, ', Tm: ', tm_default(fwd_align))\n", - "print(rvs_primer_ase1.seq, ', Tm: ', tm_default(rvs_align))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS 2263bp_PCR_prod 2263 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_description_description.\n", - "ACCESSION 2263bp\n", - "VERSION 2263bp\n", - "DBLINK BioProject: PRJNA13836\n", - " BioSample: SAMEA3138176\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " CDS join(12..435,483..2254)\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " /codon_start=1\n", - " /product=\"antiparallel microtubule cross-linking factor\n", - " Ase1\"\n", - " /protein_id=\"CAC21482.1\"\n", - " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", - " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", - " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", - " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", - " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", - " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", - " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", - " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", - " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", - " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", - " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", - " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", - " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", - " primer_bind 12..31\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(2236..2254)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 accatgtcga catgcaaaca gtaatgatgg atgacattca aagcactgat tctattgctg\n", - " 61 aaaaagataa tcactctaat aatgaatcta actttacttg gaaagcgttt cgtgaacaag\n", - " 121 tggaaaagca tttttctaaa attgaaaggc ttcaccaagt ccttggaaca gatggagaca\n", - " 181 attcatcatt atttgagttg tttacaacgg caatgaatgc ccagcttcat gaaatggaac\n", - " 241 agtgccagaa aaaacttgaa gatgactgtc agcaaagaat tgattcaatc agatttttgg\n", - " 301 tttcctcatt aaagttaacg gatgatactt ctagtctcaa aattgagtct cctttaattc\n", - " 361 agtgtttgaa tcgtttgtca atggtagaag gacaatatat ggcacagtat gatcaaaagt\n", - " 421 taagtacgat taaaggtatg taatcgtctt taatttagac ttgtgtttta actgatgtat\n", - " 481 agaaatgtat cacaaattgg agtcatattg taaccgctta ggaagtccgt tcgttttacc\n", - " 541 tgattttgag aattcatttt tatctgatgt atccgatgct tttactgaat ctttgagagg\n", - " 601 acgcatcaac gaagccgaaa aggagattga tgcgagatta gaggttatta attcctttga\n", - " 661 agaagaaatt ttgggtttgt ggtctgaact cggtgttgag cccgctgatg ttccacaata\n", - " 721 cgaacaattg cttgaatccc atactaatcg accaaatgat gtttatgtta ctcaagaact\n", - " 781 tatcgaccaa ctttgcaagc aaaaagaagt tttttccgct gaaaaagaaa agagaagtga\n", - " 841 tcatttaaaa agtatacaat cagaagttag caacttgtgg aataagcttc aagtttctcc\n", - " 901 caatgaacaa agtcaatttg gcgattcatc aaacattaat caagaaaata tttcattatg\n", - " 961 ggaaactgaa cttgaaaaac ttcatcagtt aaaaaaggag catttaccca tttttttaga\n", - " 1021 agactgtcgt caacaaattc ttcagctttg ggattctctg ttttattcag aagaacaaag\n", - " 1081 aaagtccttt acacctatgt atgaagacat tattacagag caggttctta cggcccatga\n", - " 1141 aaactatata aagcaactag aggccgaagt ttctgctaat aagtcctttt taagcttaat\n", - " 1201 taatcgctat gcctctttaa tagaaggaaa gaaagagctt gaagctagtt ctaatgatgc\n", - " 1261 ctctcgtcta acacaacggg gacgccggga cccaggttta cttctacgtg aagagaaaat\n", - " 1321 ccgtaagcga ctttctagag aacttcctaa ggttcagtcg ctgcttatac cagagattac\n", - " 1381 agcatgggaa gaaagaaatg gaaggacgtt ccttttttat gatgaaccac ttctcaagat\n", - " 1441 ttgccaagag gccactcaac caaaatcatt atatagaagt gcaagtgctg ccgcaaaccg\n", - " 1501 cccgaaaaca gcaactacaa cggactctgt taatagaaca ccttctcaac gagggcgtgt\n", - " 1561 agctgtacct tcaacaccaa gtgttaggtc cgcttctcga gctatgacga gtccaaggac\n", - " 1621 accgcttcct agagtaaaaa acactcaaaa tccaagtcgt tccattagtg cagaaccgcc\n", - " 1681 atcagcaacc agtaccgcca atagaagaca ccccactgct aatcgaattg atataaacgc\n", - " 1741 tagattaaac agtgctagtc ggtctcgaag cgcgaacatg ataagacaag gggcaaatgg\n", - " 1801 tagtgacagc aatatgtctt cttcacccgt ttctggaaat tccaataccc cttttaacaa\n", - " 1861 gtttccaaat tctgtatctc gcaatacaca ttttgaatcc aagtcaccgc acccaaatta\n", - " 1921 ctctcgaact cctcatgaaa cgtattcaaa ggcttcatct aagaacgtcc cattaagtcc\n", - " 1981 tccaaagcag cgtgtagtta atgaacacgc tttaaatatt atgtcggaaa aattgcaaag\n", - " 2041 aactaatctg aaagaacaaa cacccgagat ggacattgaa aacagctcgc agaaccttcc\n", - " 2101 tttttctcct atgaagatat cccccataag agcatcaccc gtaaagacaa ttccatcatc\n", - " 2161 accgtccccc actaccaaca ttttttctgc tccactcaac aatattacaa attgtacacc\n", - " 2221 gatggaggat gaatggggag aagaaggctt ttaatggcgc gcc\n", - "//\n" - ] - } - ], - "source": [ - "# Performing a PCR to check that the primers are specific. An error message is returned if otherwise.\n", - "\n", - "pcr_product = pcr(fwd_primer_ase1, rvs_primer_ase1, pombe_chromosome_I)\n", - "\n", - "# Printing out the PCR results\n", - "\n", - "print(pcr_product.format(\"gb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(Dseqrecord(-30), Dseqrecord(-3916))\n", - "(Dseqrecord(-10), Dseqrecord(-2255), Dseqrecord(-6))\n" - ] - } - ], - "source": [ - "# Cleaving the cloning vector with restriction enzymes\n", - "\n", - "plasmid_digests = vector.cut(SalI, AscI)\n", - "\n", - "# Cleaving the gene fragment with restriction enzymes\n", - "\n", - "gene_digests = Dseqrecord(pcr_product).cut(SalI, AscI)\n", - "\n", - "# Printing out the digests\n", - "print(plasmid_digests) \n", - "print(gene_digests)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS name 6163 bp DNA circular UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 52..1408\n", - " /label=\"kanMX\"\n", - " /note=\"yeast selectable marker conferring kanamycin\n", - " resistance (Wach et al., 1994)\"\n", - " promoter 52..395\n", - " /label=\"TEF promoter\"\n", - " /note=\"Ashbya gossypii TEF promoter\"\n", - " CDS 396..1205\n", - " /codon_start=1\n", - " /gene=\"aph(3')-Ia\"\n", - " /product=\"aminoglycoside phosphotransferase\"\n", - " /label=\"KanR\"\n", - " /note=\"confers resistance to kanamycin\"\n", - " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", - " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", - " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", - " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", - " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", - " primer_bind complement(463..482)\n", - " /label=\"Kan-R\"\n", - " /note=\"Kanamycin resistance gene, reverse primer\"\n", - " terminator 1211..1408\n", - " /label=\"TEF terminator\"\n", - " /note=\"Ashbya gossypii TEF terminator\"\n", - " primer_bind complement(1512..1531)\n", - " /label=\"T7\"\n", - " /note=\"T7 promoter, forward primer\"\n", - " promoter complement(1513..1531)\n", - " /label=\"T7 promoter\"\n", - " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", - " primer_bind complement(1618..1635)\n", - " /label=\"L4440\"\n", - " /note=\"L4440 vector, forward primer\"\n", - " rep_origin complement(1789..2377)\n", - " /direction=LEFT\n", - " /label=\"ori\"\n", - " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", - " replication\"\n", - " primer_bind complement(1869..1888)\n", - " /label=\"pBR322ori-F\"\n", - " /note=\"pBR322 origin, forward primer\"\n", - " CDS complement(2548..3408)\n", - " /codon_start=1\n", - " /gene=\"bla\"\n", - " /product=\"beta-lactamase\"\n", - " /label=\"AmpR\"\n", - " /note=\"confers resistance to ampicillin, carbenicillin, and\n", - " related antibiotics\"\n", - " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", - " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", - " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", - " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", - " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", - " LIKHW\"\n", - " primer_bind 3171..3190\n", - " /label=\"Amp-R\"\n", - " /note=\"Ampicillin resistance gene, reverse primer\"\n", - " promoter complement(3409..3513)\n", - " /gene=\"bla\"\n", - " /label=\"AmpR promoter\"\n", - " primer_bind 3581..3599\n", - " /label=\"pBRforEco\"\n", - " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", - " primer\"\n", - " primer_bind complement(3637..3659)\n", - " /label=\"pGEX 3'\"\n", - " /note=\"pGEX vectors, reverse primer\"\n", - " primer_bind 3759..3778\n", - " /label=\"pRS-marker\"\n", - " /note=\"pRS vectors, use to sequence yeast selectable\n", - " marker\"\n", - " promoter 3859..3877\n", - " /label=\"SP6 promoter\"\n", - " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", - " primer_bind 3859..3876\n", - " /label=\"SP6\"\n", - " /note=\"SP6 promoter, forward primer\"\n", - " CDS join(3918..4341,4389..6160)\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " /codon_start=1\n", - " /product=\"antiparallel microtubule cross-linking factor\n", - " Ase1\"\n", - " /protein_id=\"CAC21482.1\"\n", - " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", - " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", - " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", - " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", - " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", - " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", - " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", - " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", - " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", - " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", - " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", - " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", - " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", - " primer_bind 3918..3937\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(6142..6160)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 cgcgccagat ctgtttagct tgcctcgtcc ccgccgggtc acccggccag cgacatggag\n", - " 61 gcccagaata ccctccttga cagtcttgac gtgcgcagct caggggcatg atgtgactgt\n", - " 121 cgcccgtaca tttagcccat acatccccat gtataatcat ttgcatccat acattttgat\n", - " 181 ggccgcacgg cgcgaagcaa aaattacggc tcctcgctgc agacctgcga gcagggaaac\n", - " 241 gctcccctca cagacgcgtt gaattgtccc cacgccgcgc ccctgtagag aaatataaaa\n", - " 301 ggttaggatt tgccactgag gttcttcttt catatacttc cttttaaaat cttgctagga\n", - " 361 tacagttctc acatcacatc cgaacataaa caaccatggg taaggaaaag actcacgttt\n", - " 421 cgaggccgcg attaaattcc aacatggatg ctgatttata tgggtataaa tgggctcgcg\n", - " 481 ataatgtcgg gcaatcaggt gcgacaatct atcgattgta tgggaagccc gatgcgccag\n", - " 541 agttgtttct gaaacatggc aaaggtagcg ttgccaatga tgttacagat gagatggtca\n", - " 601 gactaaactg gctgacggaa tttatgcctc ttccgaccat caagcatttt atccgtactc\n", - " 661 ctgatgatgc atggttactc accactgcga tccccggcaa aacagcattc caggtattag\n", - " 721 aagaatatcc tgattcaggt gaaaatattg ttgatgcgct ggcagtgttc ctgcgccggt\n", - " 781 tgcattcgat tcctgtttgt aattgtcctt ttaacagcga tcgcgtattt cgtctcgctc\n", - " 841 aggcgcaatc acgaatgaat aacggtttgg ttgatgcgag tgattttgat gacgagcgta\n", - " 901 atggctggcc tgttgaacaa gtctggaaag aaatgcataa gcttttgcca ttctcaccgg\n", - " 961 attcagtcgt cactcatggt gatttctcac ttgataacct tatttttgac gaggggaaat\n", - " 1021 taataggttg tattgatgtt ggacgagtcg gaatcgcaga ccgataccag gatcttgcca\n", - " 1081 tcctatggaa ctgcctcggt gagttttctc cttcattaca gaaacggctt tttcaaaaat\n", - " 1141 atggtattga taatcctgat atgaataaat tgcagtttca tttgatgctc gatgagtttt\n", - " 1201 tctaatcagt actgacaata aaaagattct tgttttcaag aacttgtcat ttgtatagtt\n", - " 1261 tttttatatt gtagttgttc tattttaatc aaatgttagc gtgatttata ttttttttcg\n", - " 1321 cctcgacatc atctgcccag atgcgaagtt aagtgcgcag aaagtaatat catgcgtcaa\n", - " 1381 tcgtatgtga atgctggtcg ctatactgct gtcgattcga tactaacgcc gccatccagt\n", - " 1441 ttaaacgagc tcgaattcat cgatgatatc agatccacta gtggcctatg cggccgcgga\n", - " 1501 tctgccggtc tccctatagt gagtcgtatt aatttcgata agccaggtta acctgcatta\n", - " 1561 atgaatcggc caacgcgcgg ggagaggcgg tttgcgtatt gggcgctctt ccgcttcctc\n", - " 1621 gctcactgac tcgctgcgct cggtcgttcg gctgcggcga gcggtatcag ctcactcaaa\n", - " 1681 ggcggtaata cggttatcca cagaatcagg ggataacgca ggaaagaaca tgtgagcaaa\n", - " 1741 aggccagcaa aaggccagga accgtaaaaa ggccgcgttg ctggcgtttt tccataggct\n", - " 1801 ccgcccccct gacgagcatc acaaaaatcg acgctcaagt cagaggtggc gaaacccgac\n", - " 1861 aggactataa agataccagg cgtttccccc tggaagctcc ctcgtgcgct ctcctgttcc\n", - " 1921 gaccctgccg cttaccggat acctgtccgc ctttctccct tcgggaagcg tggcgctttc\n", - " 1981 tcatagctca cgctgtaggt atctcagttc ggtgtaggtc gttcgctcca agctgggctg\n", - " 2041 tgtgcacgaa ccccccgttc agcccgaccg ctgcgcctta tccggtaact atcgtcttga\n", - " 2101 gtccaacccg gtaagacacg acttatcgcc actggcagca gccactggta acaggattag\n", - " 2161 cagagcgagg tatgtaggcg gtgctacaga gttcttgaag tggtggccta actacggcta\n", - " 2221 cactagaaga acagtatttg gtatctgcgc tctgctgaag ccagttacct tcggaaaaag\n", - " 2281 agttggtagc tcttgatccg gcaaacaaac caccgctggt agcggtggtt tttttgtttg\n", - " 2341 caagcagcag attacgcgca gaaaaaaagg atctcaagaa gatcctttga tcttttctac\n", - " 2401 ggggtctgac gctcagtgga acgaaaactc acgttaaggg attttggtca tgagattatc\n", - " 2461 aaaaaggatc ttcacctaga tccttttaaa ttaaaaatga agttttaaat caatctaaag\n", - " 2521 tatatatgag taaacttggt ctgacagtta ccaatgctta atcagtgagg cacctatctc\n", - " 2581 agcgatctgt ctatttcgtt catccatagt tgcctgactc cccgtcgtgt agataactac\n", - " 2641 gatacgggag ggcttaccat ctggccccag tgctgcaatg ataccgcgag acccacgctc\n", - " 2701 accggctcca gatttatcag caataaacca gccagccgga agggccgagc gcagaagtgg\n", - " 2761 tcctgcaact ttatccgcct ccatccagtc tattaattgt tgccgggaag ctagagtaag\n", - " 2821 tagttcgcca gttaatagtt tgcgcaacgt tgttgccatt gctacaggca tcgtggtgtc\n", - " 2881 acgctcgtcg tttggtatgg cttcattcag ctccggttcc caacgatcaa ggcgagttac\n", - " 2941 atgatccccc atgttgtgca aaaaagcggt tagctccttc ggtcctccga tcgttgtcag\n", - " 3001 aagtaagttg gccgcagtgt tatcactcat ggttatggca gcactgcata attctcttac\n", - " 3061 tgtcatgcca tccgtaagat gcttttctgt gactggtgag tactcaacca agtcattctg\n", - " 3121 agaatagtgt atgcggcgac cgagttgctc ttgcccggcg tcaatacggg ataataccgc\n", - " 3181 gccacatagc agaactttaa aagtgctcat cattggaaaa cgttcttcgg ggcgaaaact\n", - " 3241 ctcaaggatc ttaccgctgt tgagatccag ttcgatgtaa cccactcgtg cacccaactg\n", - " 3301 atcttcagca tcttttactt tcaccagcgt ttctgggtga gcaaaaacag gaaggcaaaa\n", - " 3361 tgccgcaaaa aagggaataa gggcgacacg gaaatgttga atactcatac tcttcctttt\n", - " 3421 tcaatattat tgaagcattt atcagggtta ttgtctcatg agcggataca tatttgaatg\n", - " 3481 tatttagaaa aataaacaaa taggggttcc gcgcacattt ccccgaaaag tgccacctga\n", - " 3541 cgtctaagaa accattatta tcatgacatt aacctataaa aataggcgta tcacgaggcc\n", - " 3601 ctttcgtctc gcgcgtttcg gtgatgacgg tgaaaacctc tgacacatgc agctcccgga\n", - " 3661 gacggtcaca gcttgtctgt aagcggatgc cgggagcaga caagcccgtc agggcgcgtc\n", - " 3721 agcgggtgtt ggcgggtgtc ggggctggct taactatgcg gcatcagagc agattgtact\n", - " 3781 gagagtgcac catatggaca tattgtcgtt agaacgcggc tacaattaat acataacctt\n", - " 3841 atgtatcata cacatacgat ttaggtgaca ctatagaacg cggccgccag ctgaagcttc\n", - " 3901 gtacgctgca ggtcgacatg caaacagtaa tgatggatga cattcaaagc actgattcta\n", - " 3961 ttgctgaaaa agataatcac tctaataatg aatctaactt tacttggaaa gcgtttcgtg\n", - " 4021 aacaagtgga aaagcatttt tctaaaattg aaaggcttca ccaagtcctt ggaacagatg\n", - " 4081 gagacaattc atcattattt gagttgttta caacggcaat gaatgcccag cttcatgaaa\n", - " 4141 tggaacagtg ccagaaaaaa cttgaagatg actgtcagca aagaattgat tcaatcagat\n", - " 4201 ttttggtttc ctcattaaag ttaacggatg atacttctag tctcaaaatt gagtctcctt\n", - " 4261 taattcagtg tttgaatcgt ttgtcaatgg tagaaggaca atatatggca cagtatgatc\n", - " 4321 aaaagttaag tacgattaaa ggtatgtaat cgtctttaat ttagacttgt gttttaactg\n", - " 4381 atgtatagaa atgtatcaca aattggagtc atattgtaac cgcttaggaa gtccgttcgt\n", - " 4441 tttacctgat tttgagaatt catttttatc tgatgtatcc gatgctttta ctgaatcttt\n", - " 4501 gagaggacgc atcaacgaag ccgaaaagga gattgatgcg agattagagg ttattaattc\n", - " 4561 ctttgaagaa gaaattttgg gtttgtggtc tgaactcggt gttgagcccg ctgatgttcc\n", - " 4621 acaatacgaa caattgcttg aatcccatac taatcgacca aatgatgttt atgttactca\n", - " 4681 agaacttatc gaccaacttt gcaagcaaaa agaagttttt tccgctgaaa aagaaaagag\n", - " 4741 aagtgatcat ttaaaaagta tacaatcaga agttagcaac ttgtggaata agcttcaagt\n", - " 4801 ttctcccaat gaacaaagtc aatttggcga ttcatcaaac attaatcaag aaaatatttc\n", - " 4861 attatgggaa actgaacttg aaaaacttca tcagttaaaa aaggagcatt tacccatttt\n", - " 4921 tttagaagac tgtcgtcaac aaattcttca gctttgggat tctctgtttt attcagaaga\n", - " 4981 acaaagaaag tcctttacac ctatgtatga agacattatt acagagcagg ttcttacggc\n", - " 5041 ccatgaaaac tatataaagc aactagaggc cgaagtttct gctaataagt cctttttaag\n", - " 5101 cttaattaat cgctatgcct ctttaataga aggaaagaaa gagcttgaag ctagttctaa\n", - " 5161 tgatgcctct cgtctaacac aacggggacg ccgggaccca ggtttacttc tacgtgaaga\n", - " 5221 gaaaatccgt aagcgacttt ctagagaact tcctaaggtt cagtcgctgc ttataccaga\n", - " 5281 gattacagca tgggaagaaa gaaatggaag gacgttcctt ttttatgatg aaccacttct\n", - " 5341 caagatttgc caagaggcca ctcaaccaaa atcattatat agaagtgcaa gtgctgccgc\n", - " 5401 aaaccgcccg aaaacagcaa ctacaacgga ctctgttaat agaacacctt ctcaacgagg\n", - " 5461 gcgtgtagct gtaccttcaa caccaagtgt taggtccgct tctcgagcta tgacgagtcc\n", - " 5521 aaggacaccg cttcctagag taaaaaacac tcaaaatcca agtcgttcca ttagtgcaga\n", - " 5581 accgccatca gcaaccagta ccgccaatag aagacacccc actgctaatc gaattgatat\n", - " 5641 aaacgctaga ttaaacagtg ctagtcggtc tcgaagcgcg aacatgataa gacaaggggc\n", - " 5701 aaatggtagt gacagcaata tgtcttcttc acccgtttct ggaaattcca ataccccttt\n", - " 5761 taacaagttt ccaaattctg tatctcgcaa tacacatttt gaatccaagt caccgcaccc\n", - " 5821 aaattactct cgaactcctc atgaaacgta ttcaaaggct tcatctaaga acgtcccatt\n", - " 5881 aagtcctcca aagcagcgtg tagttaatga acacgcttta aatattatgt cggaaaaatt\n", - " 5941 gcaaagaact aatctgaaag aacaaacacc cgagatggac attgaaaaca gctcgcagaa\n", - " 6001 ccttcctttt tctcctatga agatatcccc cataagagca tcacccgtaa agacaattcc\n", - " 6061 atcatcaccg tcccccacta ccaacatttt ttctgctcca ctcaacaata ttacaaattg\n", - " 6121 tacaccgatg gaggatgaat ggggagaaga aggcttttaa tgg\n", - "//\n" - ] - } - ], - "source": [ - "# Ligating, then circularising the synthetic plasmid\n", - "\n", - "synthetic_vector = plasmid_digests[1] + gene_digests [1]\n", - "synthetic_vector = synthetic_vector.looped()\n", - "\n", - "# Printing out the completed cloning vector\n", - "\n", - "print(synthetic_vector.format(\"gb\"))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of a Plasmid Restriction/Ligation Cloning\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "This example showcases a workflow of modelling molecular cloning with restriction enzymes, PCR, and ligases, to clone gene fragments into plasmids. This example constructs a synthetic plasmid by cloning the ase1 gene, which encodes a microtubule associated protein responsible for mitotic spindle assembly, into the pFA6a-kanMX6 cloning vector:\n", + "\n", + "1. The ase1 gene fragment is first cloned from a portion of the _S. pombe_ genome through PCR:\n", + "2. The pFA6a-kanMX6 cloning vector is then cleaved with AscI and SalI. The ase1 gene fragment is also cleaved with SalI and AscI\n", + "3. The fragment is ligated with the linearized pFA6a-kanMX6 vector.\n", + "\n", + "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing all necessary classes and methods\n", + "\n", + "from pydna.parsers import parse\n", + "from pydna.tm import tm_default\n", + "from pydna.amplify import pcr\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from Bio.Restriction import SalI, AscI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS pFA6a-kanMX6 3938 bp ds-DNA circular SYN 16-JUN-2022\n", + "DEFINITION synthetic circular DNA.\n", + "ACCESSION .\n", + "VERSION .\n", + "KEYWORDS pFA6a-kanMX6.\n", + "SOURCE synthetic DNA construct\n", + " ORGANISM synthetic DNA construct\n", + " .\n", + "REFERENCE 1 (bases 1 to 3938)\n", + " AUTHORS Bahler J, Wu JQ, Longtine MS, Shah NG, McKenzie A 3rd, Steever AB,\n", + " Wach A, Philippsen P, Pringle JR\n", + " TITLE Heterologous modules for efficient and versatile PCR-based gene\n", + " targeting in Schizosaccharomyces pombe.\n", + " JOURNAL Yeast. 1998 Jul;14(10):943-51.\n", + " PUBMED 9717240\n", + "REFERENCE 2 (bases 1 to 3938)\n", + " AUTHORS .\n", + " TITLE Direct Submission\n", + " JOURNAL Exported Jun 16, 2022 from SnapGene Server 1.1.58\n", + " http://www.snapgene.com\n", + "FEATURES Location/Qualifiers\n", + " source 1..3938\n", + " /organism=\"synthetic DNA construct\"\n", + " /mol_type=\"other DNA\"\n", + " primer_bind complement(35..52)\n", + " /label=\"L4440\"\n", + " /note=\"L4440 vector, forward primer\"\n", + " rep_origin complement(206..794)\n", + " /direction=LEFT\n", + " /label=\"ori\"\n", + " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", + " replication\"\n", + " primer_bind complement(286..305)\n", + " /label=\"pBR322ori-F\"\n", + " /note=\"pBR322 origin, forward primer\"\n", + " CDS complement(965..1825)\n", + " /codon_start=1\n", + " /gene=\"bla\"\n", + " /product=\"beta-lactamase\"\n", + " /label=\"AmpR\"\n", + " /note=\"confers resistance to ampicillin, carbenicillin, and\n", + " related antibiotics\"\n", + " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", + " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", + " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", + " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", + " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", + " LIKHW\"\n", + " primer_bind 1588..1607\n", + " /label=\"Amp-R\"\n", + " /note=\"Ampicillin resistance gene, reverse primer\"\n", + " promoter complement(1826..1930)\n", + " /gene=\"bla\"\n", + " /label=\"AmpR promoter\"\n", + " primer_bind 1998..2016\n", + " /label=\"pBRforEco\"\n", + " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", + " primer\"\n", + " primer_bind complement(2054..2076)\n", + " /label=\"pGEX 3'\"\n", + " /note=\"pGEX vectors, reverse primer\"\n", + " primer_bind 2176..2195\n", + " /label=\"pRS-marker\"\n", + " /note=\"pRS vectors, use to sequence yeast selectable\n", + " marker\"\n", + " promoter 2276..2294\n", + " /label=\"SP6 promoter\"\n", + " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", + " primer_bind 2276..2293\n", + " /label=\"SP6\"\n", + " /note=\"SP6 promoter, forward primer\"\n", + " gene 2407..3763\n", + " /label=\"kanMX\"\n", + " /note=\"yeast selectable marker conferring kanamycin\n", + " resistance (Wach et al., 1994)\"\n", + " promoter 2407..2750\n", + " /label=\"TEF promoter\"\n", + " /note=\"Ashbya gossypii TEF promoter\"\n", + " CDS 2751..3560\n", + " /codon_start=1\n", + " /gene=\"aph(3')-Ia\"\n", + " /product=\"aminoglycoside phosphotransferase\"\n", + " /label=\"KanR\"\n", + " /note=\"confers resistance to kanamycin\"\n", + " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", + " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", + " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", + " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", + " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", + " primer_bind complement(2818..2837)\n", + " /label=\"Kan-R\"\n", + " /note=\"Kanamycin resistance gene, reverse primer\"\n", + " terminator 3566..3763\n", + " /label=\"TEF terminator\"\n", + " /note=\"Ashbya gossypii TEF terminator\"\n", + " primer_bind complement(3867..3886)\n", + " /label=\"T7\"\n", + " /note=\"T7 promoter, forward primer\"\n", + " promoter complement(3868..3886)\n", + " /label=\"T7 promoter\"\n", + " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", + "ORIGIN\n", + " 1 gaggcggttt gcgtattggg cgctcttccg cttcctcgct cactgactcg ctgcgctcgg\n", + " 61 tcgttcggct gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag\n", + " 121 aatcagggga taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc\n", + " 181 gtaaaaaggc cgcgttgctg gcgtttttcc ataggctccg cccccctgac gagcatcaca\n", + " 241 aaaatcgacg ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt\n", + " 301 ttccccctgg aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc\n", + " 361 tgtccgcctt tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc\n", + " 421 tcagttcggt gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc\n", + " 481 ccgaccgctg cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact\n", + " 541 tatcgccact ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg\n", + " 601 ctacagagtt cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta\n", + " 661 tctgcgctct gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca\n", + " 721 aacaaaccac cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa\n", + " 781 aaaaaggatc tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg\n", + " 841 aaaactcacg ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc\n", + " 901 ttttaaatta aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg\n", + " 961 acagttacca atgcttaatc agtgaggcac ctatctcagc gatctgtcta tttcgttcat\n", + " 1021 ccatagttgc ctgactcccc gtcgtgtaga taactacgat acgggagggc ttaccatctg\n", + " 1081 gccccagtgc tgcaatgata ccgcgagacc cacgctcacc ggctccagat ttatcagcaa\n", + " 1141 taaaccagcc agccggaagg gccgagcgca gaagtggtcc tgcaacttta tccgcctcca\n", + " 1201 tccagtctat taattgttgc cgggaagcta gagtaagtag ttcgccagtt aatagtttgc\n", + " 1261 gcaacgttgt tgccattgct acaggcatcg tggtgtcacg ctcgtcgttt ggtatggctt\n", + " 1321 cattcagctc cggttcccaa cgatcaaggc gagttacatg atcccccatg ttgtgcaaaa\n", + " 1381 aagcggttag ctccttcggt cctccgatcg ttgtcagaag taagttggcc gcagtgttat\n", + " 1441 cactcatggt tatggcagca ctgcataatt ctcttactgt catgccatcc gtaagatgct\n", + " 1501 tttctgtgac tggtgagtac tcaaccaagt cattctgaga atagtgtatg cggcgaccga\n", + " 1561 gttgctcttg cccggcgtca atacgggata ataccgcgcc acatagcaga actttaaaag\n", + " 1621 tgctcatcat tggaaaacgt tcttcggggc gaaaactctc aaggatctta ccgctgttga\n", + " 1681 gatccagttc gatgtaaccc actcgtgcac ccaactgatc ttcagcatct tttactttca\n", + " 1741 ccagcgtttc tgggtgagca aaaacaggaa ggcaaaatgc cgcaaaaaag ggaataaggg\n", + " 1801 cgacacggaa atgttgaata ctcatactct tcctttttca atattattga agcatttatc\n", + " 1861 agggttattg tctcatgagc ggatacatat ttgaatgtat ttagaaaaat aaacaaatag\n", + " 1921 gggttccgcg cacatttccc cgaaaagtgc cacctgacgt ctaagaaacc attattatca\n", + " 1981 tgacattaac ctataaaaat aggcgtatca cgaggccctt tcgtctcgcg cgtttcggtg\n", + " 2041 atgacggtga aaacctctga cacatgcagc tcccggagac ggtcacagct tgtctgtaag\n", + " 2101 cggatgccgg gagcagacaa gcccgtcagg gcgcgtcagc gggtgttggc gggtgtcggg\n", + " 2161 gctggcttaa ctatgcggca tcagagcaga ttgtactgag agtgcaccat atggacatat\n", + " 2221 tgtcgttaga acgcggctac aattaataca taaccttatg tatcatacac atacgattta\n", + " 2281 ggtgacacta tagaacgcgg ccgccagctg aagcttcgta cgctgcaggt cgacggatcc\n", + " 2341 ccgggttaat taaggcgcgc cagatctgtt tagcttgcct cgtccccgcc gggtcacccg\n", + " 2401 gccagcgaca tggaggccca gaataccctc cttgacagtc ttgacgtgcg cagctcaggg\n", + " 2461 gcatgatgtg actgtcgccc gtacatttag cccatacatc cccatgtata atcatttgca\n", + " 2521 tccatacatt ttgatggccg cacggcgcga agcaaaaatt acggctcctc gctgcagacc\n", + " 2581 tgcgagcagg gaaacgctcc cctcacagac gcgttgaatt gtccccacgc cgcgcccctg\n", + " 2641 tagagaaata taaaaggtta ggatttgcca ctgaggttct tctttcatat acttcctttt\n", + " 2701 aaaatcttgc taggatacag ttctcacatc acatccgaac ataaacaacc atgggtaagg\n", + " 2761 aaaagactca cgtttcgagg ccgcgattaa attccaacat ggatgctgat ttatatgggt\n", + " 2821 ataaatgggc tcgcgataat gtcgggcaat caggtgcgac aatctatcga ttgtatggga\n", + " 2881 agcccgatgc gccagagttg tttctgaaac atggcaaagg tagcgttgcc aatgatgtta\n", + " 2941 cagatgagat ggtcagacta aactggctga cggaatttat gcctcttccg accatcaagc\n", + " 3001 attttatccg tactcctgat gatgcatggt tactcaccac tgcgatcccc ggcaaaacag\n", + " 3061 cattccaggt attagaagaa tatcctgatt caggtgaaaa tattgttgat gcgctggcag\n", + " 3121 tgttcctgcg ccggttgcat tcgattcctg tttgtaattg tccttttaac agcgatcgcg\n", + " 3181 tatttcgtct cgctcaggcg caatcacgaa tgaataacgg tttggttgat gcgagtgatt\n", + " 3241 ttgatgacga gcgtaatggc tggcctgttg aacaagtctg gaaagaaatg cataagcttt\n", + " 3301 tgccattctc accggattca gtcgtcactc atggtgattt ctcacttgat aaccttattt\n", + " 3361 ttgacgaggg gaaattaata ggttgtattg atgttggacg agtcggaatc gcagaccgat\n", + " 3421 accaggatct tgccatccta tggaactgcc tcggtgagtt ttctccttca ttacagaaac\n", + " 3481 ggctttttca aaaatatggt attgataatc ctgatatgaa taaattgcag tttcatttga\n", + " 3541 tgctcgatga gtttttctaa tcagtactga caataaaaag attcttgttt tcaagaactt\n", + " 3601 gtcatttgta tagttttttt atattgtagt tgttctattt taatcaaatg ttagcgtgat\n", + " 3661 ttatattttt tttcgcctcg acatcatctg cccagatgcg aagttaagtg cgcagaaagt\n", + " 3721 aatatcatgc gtcaatcgta tgtgaatgct ggtcgctata ctgctgtcga ttcgatacta\n", + " 3781 acgccgccat ccagtttaaa cgagctcgaa ttcatcgatg atatcagatc cactagtggc\n", + " 3841 ctatgcggcc gcggatctgc cggtctccct atagtgagtc gtattaattt cgataagcca\n", + " 3901 ggttaacctg cattaatgaa tcggccaacg cgcgggga\n", + "//\n", + "LOCUS CU329670 4538 bp DNA linear PLN 26-APR-2024\n", + "DEFINITION Schizosaccharomyces pombe strain 972h- genome assembly, chromosome:\n", + " I.\n", + "ACCESSION CU329670\n", + "VERSION CU329670.1\n", + "DBLINK BioProject: PRJNA13836\n", + " BioSample: SAMEA3138176\n", + "KEYWORDS .\n", + "SOURCE Schizosaccharomyces pombe (fission yeast)\n", + " ORGANISM Schizosaccharomyces pombe\n", + " Eukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina;\n", + " Schizosaccharomycetes; Schizosaccharomycetales;\n", + " Schizosaccharomycetaceae; Schizosaccharomyces.\n", + "REFERENCE 1 (bases 1 to 4538)\n", + " AUTHORS Lang,B.F.\n", + " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", + " pombe: highly homologous introns are inserted at the same position\n", + " of the otherwise less conserved cox1 genes in Schizosaccharomyces\n", + " pombe and Aspergillus nidulans\n", + " JOURNAL EMBO J 3 (9), 2129-2136 (1984)\n", + " PUBMED 6092057\n", + "REFERENCE 2 (bases 1 to 4538)\n", + " AUTHORS Lang,B.F., Ahne,F. and Bonen,L.\n", + " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", + " pombe. The cytochrome b gene has an intron closely related to the\n", + " first two introns in the Saccharomyces cerevisiae cox1 gene\n", + " JOURNAL J Mol Biol 184 (3), 353-366 (1985)\n", + " PUBMED 4046021\n", + "REFERENCE 3 (bases 1 to 4538)\n", + " AUTHORS Lang,B.F., Cedergren,R. and Gray,M.W.\n", + " TITLE The mitochondrial genome of the fission yeast, Schizosaccharomyces\n", + " pombe. Sequence of the large-subunit ribosomal RNA gene, comparison\n", + " of potential secondary structure in fungal mitochondrial\n", + " large-subunit rRNAs and evolutionary considerations\n", + " JOURNAL Eur J Biochem 169 (3), 527-537 (1987)\n", + " PUBMED 2446871\n", + "REFERENCE 4 (bases 1 to 4538)\n", + " AUTHORS Trinkl,H., Lang,B.F. and Wolf,K.\n", + " TITLE Nucleotide sequence of the gene encoding the small ribosomal RNA in\n", + " the mitochondrial genome of the fission yeast Schizosaccharomyces\n", + " pombe\n", + " JOURNAL Nucleic Acids Res 17 (16), 6730 (1989)\n", + " PUBMED 2780299\n", + "REFERENCE 5 (bases 1 to 4538)\n", + " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", + " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", + " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", + " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", + " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", + " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", + " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", + " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", + " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", + " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", + " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", + " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", + " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", + " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", + " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", + " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", + " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", + " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", + " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", + " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", + " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", + " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", + " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", + " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", + " and Nurse,P.\n", + " TITLE The genome sequence of Schizosaccharomyces pombe\n", + " JOURNAL Nature 415 (6874), 871-880 (2002)\n", + " PUBMED 11859360\n", + " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", + " Cerutti L]]\n", + "REFERENCE 6\n", + " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", + " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", + " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", + " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", + " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", + " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", + " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", + " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", + " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", + " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", + " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", + " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", + " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", + " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", + " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", + " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", + " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", + " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", + " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", + " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", + " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", + " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", + " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", + " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", + " and Nurse,P.\n", + " TITLE The genome sequence of Schizosaccharomyces pombe\n", + " JOURNAL Nature 415 (6874), 871-880 (2002)\n", + " PUBMED 11859360\n", + " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", + " Cerutti L]]\n", + "REFERENCE 7 (bases 1 to 4538)\n", + " AUTHORS Schafer,B., Hansen,M. and Lang,B.F.\n", + " TITLE Transcription and RNA-processing in fission yeast mitochondria\n", + " JOURNAL RNA 11 (5), 785-795 (2005)\n", + " PUBMED 15811919\n", + "REFERENCE 8\n", + " AUTHORS Wood,V.\n", + " CONSRTM The Schizosaccharomyces pombe Genome Sequencing Consortium\n", + " TITLE Direct Submission\n", + " JOURNAL Submitted (29-JUN-2007) European Schizosaccharomyces genome\n", + " sequencing project, Sanger Institute, The Wellcome Trust Genome\n", + " Campus, Hinxton, Cambridge CB10 1SA\n", + "REFERENCE 9\n", + " AUTHORS Wood,V. and Rutherford,K.\n", + " CONSRTM PomBase\n", + " TITLE Direct Submission\n", + " JOURNAL Submitted (13-MAR-2024) University of Cambridge, PomBase, Hopkins\n", + " building, Tennis Court Rd, Cambridge, United Kingdom\n", + "COMMENT On or before Jan 26, 2012 this sequence version replaced\n", + " AL672256.4, AL009197.1, AL009227.1, AL021046.4, AL021809.4,\n", + " AL021813.1, AL021817.2, AL031180.3, AL034486.1, AL034565.1,\n", + " AL034583.1, AL035064.1, AL035248.2, AL035254.1, AL035439.1,\n", + " AL096845.1, AL109734.1, AL109738.1, AL109739.1, AL109770.1,\n", + " AL109820.1, AL109831.1, AL109832.1, AL109951.1, AL109988.1,\n", + " AL110469.1, AL110509.2, AL117210.1, AL117212.1, AL117213.1,\n", + " AL117390.1, AL121732.1, AL121741.1, AL121745.2, AL121764.1,\n", + " AL121765.1, AL121770.1, AL122032.1, AL132667.1, AL132675.1,\n", + " AL132714.1, AL132769.1, AL132779.2, AL132798.2, AL132828.1,\n", + " AL132839.1, AL132983.1, AL132984.1, AL133154.2, AL133156.1,\n", + " AL133157.1, AL133225.2, AL133302.1, AL133357.1, AL133359.1,\n", + " AL133360.1, AL133361.1, AL133442.1, AL133498.1, AL133521.1,\n", + " AL133522.1, AL135751.1, AL136078.1, AL136235.1, AL136499.1,\n", + " AL136521.2, AL136538.1, AL137130.1, AL138666.2, AL138854.1,\n", + " AL139315.1, AL157734.1, AL157811.1, AL157872.1, AL157917.1,\n", + " AL157993.1, AL157994.1, AL158056.1, AL159180.1, AL159951.1,\n", + " AL162531.1, AL162631.1, AL163031.1, AL163071.1, AL163191.2,\n", + " AL163481.1, AL163529.1, AL353014.1, AL353860.2, AL355012.1,\n", + " AL355013.1, AL355252.1, AL355452.1, AL355632.1, AL355652.1,\n", + " AL355653.1, AL356333.1, AL356335.1, AL357232.1, AL358272.1,\n", + " AL360054.1, AL360094.1, AL390095.1, AL390274.1, AL390814.1,\n", + " AL391713.1, AL391744.1, AL391746.2, AL391783.1, AL441621.1,\n", + " AL441624.1, AL512486.1, AL512487.1, AL512491.1, AL512493.1,\n", + " AL512496.1, AL512549.1, AL512562.1, AL583902.1, AL590562.1,\n", + " AL590582.1, AL590602.1, AL590605.1, AL590902.2, AL590903.1,\n", + " AL691401.1, AL691402.1, AL691405.1, Z49811.1, Z50112.1, Z50113.1,\n", + " Z50142.1, Z50728.2, Z54096.1, Z54142.2, Z54285.2, Z54308.1,\n", + " Z54328.1, Z54354.1, Z54366.1, Z56276.2, Z64354.1, Z66568.2,\n", + " Z67757.1, Z67961.2, Z67998.1, Z67999.1, Z68136.2, Z68144.1,\n", + " Z68166.1, Z68197.2, Z68198.1, Z68887.1, Z69086.1, Z69239.1,\n", + " Z69240.1, Z69368.1, Z69369.1, Z69380.1, Z69725.1, Z69726.1,\n", + " Z69727.1, Z69728.1, Z69729.1, Z69730.1, Z69731.1, Z69795.1,\n", + " Z69796.1, Z69944.1, Z70043.1, Z70690.1, Z70691.1, Z70721.1,\n", + " Z73099.2, Z73100.2, Z81312.1, Z81317.1, Z94864.1, Z95334.1,\n", + " Z95395.1, Z95396.2, Z97185.1, Z97208.1, Z97209.1, Z97210.2,\n", + " Z98056.2, Z98529.1, Z98530.2, Z98531.2, Z98532.1, Z98533.1,\n", + " Z98559.1, Z98560.1, Z98595.1, Z98596.1, Z98597.1, Z98598.1,\n", + " Z98600.1, Z98601.1, Z98602.1, Z98603.1, Z98762.1, Z98763.1,\n", + " Z98849.1, Z98944.1, Z98974.2, Z98975.1, Z98977.4, Z98978.1,\n", + " Z98979.1, Z98980.1, Z98981.3, Z99091.2, Z99126.1, Z99161.1,\n", + " Z99162.1, Z99163.2, Z99164.2, Z99165.1, Z99166.1, Z99167.1,\n", + " Z99168.1, Z99258.1, Z99259.1, Z99260.2, Z99261.1, Z99262.1,\n", + " Z99292.1, Z99295.1, Z99296.2, Z99531.1, Z99532.2, Z99568.2,\n", + " Z99753.1.\n", + "FEATURES Location/Qualifiers\n", + " source 1..4538\n", + " /organism=\"Schizosaccharomyces pombe\"\n", + " /mol_type=\"genomic DNA\"\n", + " /strain=\"972h-\"\n", + " /db_xref=\"taxon:4896\"\n", + " /chromosome=\"I\"\n", + " gene <1..676\n", + " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", + " CDS <1..393\n", + " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", + " /codon_start=1\n", + " /product=\"conserved fungal protein\"\n", + " /protein_id=\"CAC21481.2\"\n", + " /translation=\"MMTRMELRPLEIGFSKALTEVAPVTCQCECWDHNLCSSQASEMDL\n", + " IYQSQDTHSCASKQDAVFQLLSETKIPVPNRYRKISHRLSTLSNKKTLKSQLDRFLSSS\n", + " KKLHNDDVNRGDYCFLLSTPVECSASTNSHSYDCLWNFSCNSFPEYSSYSASETSSVAS\n", + " YSYYSGPNPATPSSSSCNLVNANSLDIYLNINNLKKSKSVPRLRGQFMEPVEHNHPLSK\n", + " SLEEQSSFLEQSKDASSNLTACNRSGSSLSSNFYSSRLSKKTSLASLNKSRASLQHKIM\n", + " SLSRNIIRRVFHKPEVHLDPSASILNLSSSHGESNLTNGLLCQNFKLFQDDWLMEDCAP\n", + " DANFTLYTPLQPWEKRSVKPEIRRPRLNPNFFRVFVLEAQMRRAGKLSANTAGRAQLIY\n", + " LPKPAVTFSTSPLHVEL\"\n", + " gene complement(<1..1972)\n", + " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", + " ncRNA complement(<1..1972)\n", + " /ncRNA_class=\"lncRNA\"\n", + " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", + " /product=\"non-coding RNA\"\n", + " 3'UTR 394..676\n", + " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", + " gene 1001..3538\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " 5'UTR 1001..1173\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " CDS join(1174..1597,1645..3416)\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " /codon_start=1\n", + " /product=\"antiparallel microtubule cross-linking factor\n", + " Ase1\"\n", + " /protein_id=\"CAC21482.1\"\n", + " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", + " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", + " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", + " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", + " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", + " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", + " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", + " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", + " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", + " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", + " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", + " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", + " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", + " 3'UTR 3417..3538\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " gene complement(3510..>4538)\n", + " /gene=\"ypt71\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", + " 3'UTR complement(3510..3690)\n", + " /gene=\"ypt71\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", + " CDS complement(join(3691..4137,4192..>4290))\n", + " /gene=\"ypt71\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", + " /codon_start=1\n", + " /product=\"GTPase Ypt71\"\n", + " /protein_id=\"CAC21483.1\"\n", + " /translation=\"MSAQKRVFLKVVILGDSGVGKTCLMNQFVNQKFSREYKATIGADF\n", + " LTKDVVVDDKLVTLQLWDTAGQERFQSLGMAFYRGADCCVIVYNVNNSKSFDSVENWRQ\n", + " EFLYQTSQDECAFPFIIVGNQIDKDASKRAVSLHRALDYCKSKHGSNMIHFEASAKENT\n", + " NVTDLFETVSRLALENESSRDDFVNDFSEPLLLSKPLNNTSSCNC\"\n", + " gene 4049..>4538\n", + " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", + " ncRNA 4049..>4538\n", + " /ncRNA_class=\"lncRNA\"\n", + " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", + " /product=\"non-coding RNA\"\n", + "ORIGIN\n", + " 1 atcatcagac gtgtatttca caagccagaa gtgcatttgg atccaagtgc ctccatttta\n", + " 61 aatctctcat cttcgcatgg cgaaagcaac ctgacaaatg gtttgctttg tcaaaatttc\n", + " 121 aagctttttc aggatgattg gttgatggag gattgtgcgc cagatgccaa tttcactttg\n", + " 181 tacaccccgc ttcaaccctg ggaaaagcga agtgtgaaac ctgaaatcag acgtcctcga\n", + " 241 ttaaatccta attttttccg agtatttgtt ttagaagctc aaatgcgacg agctggaaag\n", + " 301 ctatcagcaa acactgctgg ccgagcccag ttaatttacc tcccaaagcc tgccgttacc\n", + " 361 ttctccacta gccctttgca tgttgaattg taaaaattta acgcatgact tatatacatt\n", + " 421 tgcattcttc caagctggtt atatttattt tcattttttt ctcacccaat acttttttat\n", + " 481 ccctactgtc tttatggaca atcgactcac aattgtttct ttttgttgta tatgattttt\n", + " 541 tttttaaagg aaatgggttt cgcgatactg ggttgaatcc caattgcggt taatattaca\n", + " 601 taaaataatt ctcccatagt cctagatcct gtctttgaat atgagcaaat aaaagaattg\n", + " 661 aacaaatcat gaatgctttt ctctcttaga tgatattttg tatgcataag tctaattata\n", + " 721 ttgattacga taagacttaa aaagtaagcc tttgtatcct tttaagcagt atttgaattt\n", + " 781 tcttgtatca tattttaggt agagcaaaag ataccagttt gtagaacttt atgtgcttcc\n", + " 841 ttacattggt atatttcagg cacataaata ttcttcaact tacaattcta agtattttgt\n", + " 901 ttatactaaa aggagctgaa taacgtttat acagtgctga cattgaaatc tatttgcttt\n", + " 961 ctttggaata taagcgcatg ctgagttact ttcgcaggcc aagccatatc caaccaccat\n", + " 1021 ttttgtgcca agcttttatg caaggttaat tccttgtact gcttgttatg ttataatata\n", + " 1081 tcaacatctt aacagttttc atatcttcct ttatattcta ttaattgaat ttcaaacatc\n", + " 1141 gttttattga gctcatttac atcaaccggt tcaatgcaaa cagtaatgat ggatgacatt\n", + " 1201 caaagcactg attctattgc tgaaaaagat aatcactcta ataatgaatc taactttact\n", + " 1261 tggaaagcgt ttcgtgaaca agtggaaaag catttttcta aaattgaaag gcttcaccaa\n", + " 1321 gtccttggaa cagatggaga caattcatca ttatttgagt tgtttacaac ggcaatgaat\n", + " 1381 gcccagcttc atgaaatgga acagtgccag aaaaaacttg aagatgactg tcagcaaaga\n", + " 1441 attgattcaa tcagattttt ggtttcctca ttaaagttaa cggatgatac ttctagtctc\n", + " 1501 aaaattgagt ctcctttaat tcagtgtttg aatcgtttgt caatggtaga aggacaatat\n", + " 1561 atggcacagt atgatcaaaa gttaagtacg attaaaggta tgtaatcgtc tttaatttag\n", + " 1621 acttgtgttt taactgatgt atagaaatgt atcacaaatt ggagtcatat tgtaaccgct\n", + " 1681 taggaagtcc gttcgtttta cctgattttg agaattcatt tttatctgat gtatccgatg\n", + " 1741 cttttactga atctttgaga ggacgcatca acgaagccga aaaggagatt gatgcgagat\n", + " 1801 tagaggttat taattccttt gaagaagaaa ttttgggttt gtggtctgaa ctcggtgttg\n", + " 1861 agcccgctga tgttccacaa tacgaacaat tgcttgaatc ccatactaat cgaccaaatg\n", + " 1921 atgtttatgt tactcaagaa cttatcgacc aactttgcaa gcaaaaagaa gttttttccg\n", + " 1981 ctgaaaaaga aaagagaagt gatcatttaa aaagtataca atcagaagtt agcaacttgt\n", + " 2041 ggaataagct tcaagtttct cccaatgaac aaagtcaatt tggcgattca tcaaacatta\n", + " 2101 atcaagaaaa tatttcatta tgggaaactg aacttgaaaa acttcatcag ttaaaaaagg\n", + " 2161 agcatttacc cattttttta gaagactgtc gtcaacaaat tcttcagctt tgggattctc\n", + " 2221 tgttttattc agaagaacaa agaaagtcct ttacacctat gtatgaagac attattacag\n", + " 2281 agcaggttct tacggcccat gaaaactata taaagcaact agaggccgaa gtttctgcta\n", + " 2341 ataagtcctt tttaagctta attaatcgct atgcctcttt aatagaagga aagaaagagc\n", + " 2401 ttgaagctag ttctaatgat gcctctcgtc taacacaacg gggacgccgg gacccaggtt\n", + " 2461 tacttctacg tgaagagaaa atccgtaagc gactttctag agaacttcct aaggttcagt\n", + " 2521 cgctgcttat accagagatt acagcatggg aagaaagaaa tggaaggacg ttcctttttt\n", + " 2581 atgatgaacc acttctcaag atttgccaag aggccactca accaaaatca ttatatagaa\n", + " 2641 gtgcaagtgc tgccgcaaac cgcccgaaaa cagcaactac aacggactct gttaatagaa\n", + " 2701 caccttctca acgagggcgt gtagctgtac cttcaacacc aagtgttagg tccgcttctc\n", + " 2761 gagctatgac gagtccaagg acaccgcttc ctagagtaaa aaacactcaa aatccaagtc\n", + " 2821 gttccattag tgcagaaccg ccatcagcaa ccagtaccgc caatagaaga caccccactg\n", + " 2881 ctaatcgaat tgatataaac gctagattaa acagtgctag tcggtctcga agcgcgaaca\n", + " 2941 tgataagaca aggggcaaat ggtagtgaca gcaatatgtc ttcttcaccc gtttctggaa\n", + " 3001 attccaatac cccttttaac aagtttccaa attctgtatc tcgcaataca cattttgaat\n", + " 3061 ccaagtcacc gcacccaaat tactctcgaa ctcctcatga aacgtattca aaggcttcat\n", + " 3121 ctaagaacgt cccattaagt cctccaaagc agcgtgtagt taatgaacac gctttaaata\n", + " 3181 ttatgtcgga aaaattgcaa agaactaatc tgaaagaaca aacacccgag atggacattg\n", + " 3241 aaaacagctc gcagaacctt cctttttctc ctatgaagat atcccccata agagcatcac\n", + " 3301 ccgtaaagac aattccatca tcaccgtccc ccactaccaa cattttttct gctccactca\n", + " 3361 acaatattac aaattgtaca ccgatggagg atgaatgggg agaagaaggc ttttaagctt\n", + " 3421 cttatttacc taatcgatca aatttaaata tacatatttt tgcatatgaa tacagcatat\n", + " 3481 agataattca taaaagttta ttaactgagg tcataattaa aagactattt acacctaaaa\n", + " 3541 aaaaacgtgt atcaatagag ggaaaagaga agaattaaga acagaaagta accatagttt\n", + " 3601 tgttaaaata gcaatgtaaa aaaatattat gaaaagaaaa cgtatagcac attttgaaat\n", + " 3661 gtaaaagaat ctgagagagc gtgtgaatat ctagcaatta caagaagatg tattattcaa\n", + " 3721 aggctttgaa agaagcaaag gttcagagaa gtcattaaca aagtcatctc tcgagctttc\n", + " 3781 attttctaaa gctaaacgac tgactgtttc gaaaaggtca gtaacgtttg tattttcttt\n", + " 3841 tgcactagct tcaaaatgaa tcatatttga tccatgtttg gatttgcaat agtcaagagc\n", + " 3901 tcgatgaaga gatacggctc gtttagacgc gtctttgtcg atttgatttc caacgataat\n", + " 3961 gaaagggaat gcacattcat cttgtgaagt ttgatataaa aattcttgcc tccagttttc\n", + " 4021 tactgagtca aaagacttcg agttattcac attataaaca attacacaac aatcggcccc\n", + " 4081 tctgtaaaaa gccattccca ggctttgaaa tcgttcttga ccagcagtat cccaaagctg\n", + " 4141 tttataatta gcaaacgaat ttagatgggc ggaacttata ttggaactta cctgtaatgt\n", + " 4201 gaccaatttg tcgtcaacca caacgtcctt ggttaaaaaa tcagcaccga tggtagcttt\n", + " 4261 atattcgcga ctaaactttt gattgacgaa ctaaaatgac gatgttaaca aattgccaaa\n", + " 4321 gcaatactca tagagaagct gatgtaaaga tcgttaacca tatttgagct agtatttaat\n", + " 4381 aacaaagtga ataaatttta aaagcaatca ccttgtagcg acaaataaca acttatcgac\n", + " 4441 ataaaatcaa tgggaaattg cagtattgga ttttacagct caatacaaaa accaaaaaga\n", + " 4501 aaaatatact gaacgtataa aatttaacgc ttcaattg\n", + "//\n" + ] + } + ], + "source": [ + "# Parsing the files\n", + "pFA6akanMX6_path = \"./pFA6a-kanMX6.gb\"\n", + "ase1_path = \"./CU329670.gb\"\n", + "vector = parse(pFA6akanMX6_path)[0]\n", + "pombe_chromosome_I = parse(ase1_path)[0]\n", + "\n", + "# Printing the parsed files\n", + "\n", + "print(vector.format(\"gb\"))\n", + "print(pombe_chromosome_I.format(\"gb\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ACCATGTCGACATGCAAACAGTAATGATGGA , Tm: 57.24061148156318\n", + "GGCGCGCCATTAAAAGCCTTCTTCTCCC , Tm: 56.64459495003314\n" + ] + } + ], + "source": [ + "from pydna.design import primer_design\n", + "#Finding the feature containing the CDS with ase1 as a type qualifier\n", + "gene = next(f for f in pombe_chromosome_I.features if f.type == \"CDS\" and\n", + " \"gene\" in f.qualifiers and\n", + " \"ase1\" in f.qualifiers[\"gene\"])\n", + "\n", + "# Using the primer_design function to design primers to amplify the CDS\n", + "# `min` and `max` can be used on a SeqFeature to get the start (leftmost) and end (rightmost) positions\n", + "# this works both on feature with SimpleLocation and CompoundLocation\n", + "amplicon = primer_design(pombe_chromosome_I[min(gene):max(gene)], target_tm=55)\n", + "\n", + "fwd_align, rvs_align = amplicon.primers()\n", + "fwd_primer_ase1 = Dseqrecord(\"ACCATGTCGAC\") + fwd_align # Adding a SalI cut site\n", + "rvs_primer_ase1 = Dseqrecord(\"GGCGCGCCAT\") + rvs_align # Adding a AscI cut site\n", + "\n", + "# Printing out the primers\n", + "\n", + "print(fwd_primer_ase1.seq, ', Tm: ', tm_default(fwd_align))\n", + "print(rvs_primer_ase1.seq, ', Tm: ', tm_default(rvs_align))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS 2263bp_PCR_prod 2263 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_description_description.\n", + "ACCESSION 2263bp\n", + "VERSION 2263bp\n", + "DBLINK BioProject: PRJNA13836\n", + " BioSample: SAMEA3138176\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " CDS join(12..435,483..2254)\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " /codon_start=1\n", + " /product=\"antiparallel microtubule cross-linking factor\n", + " Ase1\"\n", + " /protein_id=\"CAC21482.1\"\n", + " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", + " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", + " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", + " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", + " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", + " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", + " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", + " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", + " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", + " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", + " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", + " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", + " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", + " primer_bind 12..31\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(2236..2254)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 accatgtcga catgcaaaca gtaatgatgg atgacattca aagcactgat tctattgctg\n", + " 61 aaaaagataa tcactctaat aatgaatcta actttacttg gaaagcgttt cgtgaacaag\n", + " 121 tggaaaagca tttttctaaa attgaaaggc ttcaccaagt ccttggaaca gatggagaca\n", + " 181 attcatcatt atttgagttg tttacaacgg caatgaatgc ccagcttcat gaaatggaac\n", + " 241 agtgccagaa aaaacttgaa gatgactgtc agcaaagaat tgattcaatc agatttttgg\n", + " 301 tttcctcatt aaagttaacg gatgatactt ctagtctcaa aattgagtct cctttaattc\n", + " 361 agtgtttgaa tcgtttgtca atggtagaag gacaatatat ggcacagtat gatcaaaagt\n", + " 421 taagtacgat taaaggtatg taatcgtctt taatttagac ttgtgtttta actgatgtat\n", + " 481 agaaatgtat cacaaattgg agtcatattg taaccgctta ggaagtccgt tcgttttacc\n", + " 541 tgattttgag aattcatttt tatctgatgt atccgatgct tttactgaat ctttgagagg\n", + " 601 acgcatcaac gaagccgaaa aggagattga tgcgagatta gaggttatta attcctttga\n", + " 661 agaagaaatt ttgggtttgt ggtctgaact cggtgttgag cccgctgatg ttccacaata\n", + " 721 cgaacaattg cttgaatccc atactaatcg accaaatgat gtttatgtta ctcaagaact\n", + " 781 tatcgaccaa ctttgcaagc aaaaagaagt tttttccgct gaaaaagaaa agagaagtga\n", + " 841 tcatttaaaa agtatacaat cagaagttag caacttgtgg aataagcttc aagtttctcc\n", + " 901 caatgaacaa agtcaatttg gcgattcatc aaacattaat caagaaaata tttcattatg\n", + " 961 ggaaactgaa cttgaaaaac ttcatcagtt aaaaaaggag catttaccca tttttttaga\n", + " 1021 agactgtcgt caacaaattc ttcagctttg ggattctctg ttttattcag aagaacaaag\n", + " 1081 aaagtccttt acacctatgt atgaagacat tattacagag caggttctta cggcccatga\n", + " 1141 aaactatata aagcaactag aggccgaagt ttctgctaat aagtcctttt taagcttaat\n", + " 1201 taatcgctat gcctctttaa tagaaggaaa gaaagagctt gaagctagtt ctaatgatgc\n", + " 1261 ctctcgtcta acacaacggg gacgccggga cccaggttta cttctacgtg aagagaaaat\n", + " 1321 ccgtaagcga ctttctagag aacttcctaa ggttcagtcg ctgcttatac cagagattac\n", + " 1381 agcatgggaa gaaagaaatg gaaggacgtt ccttttttat gatgaaccac ttctcaagat\n", + " 1441 ttgccaagag gccactcaac caaaatcatt atatagaagt gcaagtgctg ccgcaaaccg\n", + " 1501 cccgaaaaca gcaactacaa cggactctgt taatagaaca ccttctcaac gagggcgtgt\n", + " 1561 agctgtacct tcaacaccaa gtgttaggtc cgcttctcga gctatgacga gtccaaggac\n", + " 1621 accgcttcct agagtaaaaa acactcaaaa tccaagtcgt tccattagtg cagaaccgcc\n", + " 1681 atcagcaacc agtaccgcca atagaagaca ccccactgct aatcgaattg atataaacgc\n", + " 1741 tagattaaac agtgctagtc ggtctcgaag cgcgaacatg ataagacaag gggcaaatgg\n", + " 1801 tagtgacagc aatatgtctt cttcacccgt ttctggaaat tccaataccc cttttaacaa\n", + " 1861 gtttccaaat tctgtatctc gcaatacaca ttttgaatcc aagtcaccgc acccaaatta\n", + " 1921 ctctcgaact cctcatgaaa cgtattcaaa ggcttcatct aagaacgtcc cattaagtcc\n", + " 1981 tccaaagcag cgtgtagtta atgaacacgc tttaaatatt atgtcggaaa aattgcaaag\n", + " 2041 aactaatctg aaagaacaaa cacccgagat ggacattgaa aacagctcgc agaaccttcc\n", + " 2101 tttttctcct atgaagatat cccccataag agcatcaccc gtaaagacaa ttccatcatc\n", + " 2161 accgtccccc actaccaaca ttttttctgc tccactcaac aatattacaa attgtacacc\n", + " 2221 gatggaggat gaatggggag aagaaggctt ttaatggcgc gcc\n", + "//\n" + ] + } + ], + "source": [ + "# Performing a PCR to check that the primers are specific. An error message is returned if otherwise.\n", + "\n", + "pcr_product = pcr(fwd_primer_ase1, rvs_primer_ase1, pombe_chromosome_I)\n", + "\n", + "# Printing out the PCR results\n", + "\n", + "print(pcr_product.format(\"gb\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(Dseqrecord(-30), Dseqrecord(-3916))\n", + "(Dseqrecord(-10), Dseqrecord(-2255), Dseqrecord(-6))\n" + ] + } + ], + "source": [ + "# Cleaving the cloning vector with restriction enzymes\n", + "\n", + "plasmid_digests = vector.cut(SalI, AscI)\n", + "\n", + "# Cleaving the gene fragment with restriction enzymes\n", + "\n", + "gene_digests = Dseqrecord(pcr_product).cut(SalI, AscI)\n", + "\n", + "# Printing out the digests\n", + "print(plasmid_digests) \n", + "print(gene_digests)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS name 6163 bp DNA circular UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 52..1408\n", + " /label=\"kanMX\"\n", + " /note=\"yeast selectable marker conferring kanamycin\n", + " resistance (Wach et al., 1994)\"\n", + " promoter 52..395\n", + " /label=\"TEF promoter\"\n", + " /note=\"Ashbya gossypii TEF promoter\"\n", + " CDS 396..1205\n", + " /codon_start=1\n", + " /gene=\"aph(3')-Ia\"\n", + " /product=\"aminoglycoside phosphotransferase\"\n", + " /label=\"KanR\"\n", + " /note=\"confers resistance to kanamycin\"\n", + " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", + " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", + " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", + " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", + " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", + " primer_bind complement(463..482)\n", + " /label=\"Kan-R\"\n", + " /note=\"Kanamycin resistance gene, reverse primer\"\n", + " terminator 1211..1408\n", + " /label=\"TEF terminator\"\n", + " /note=\"Ashbya gossypii TEF terminator\"\n", + " primer_bind complement(1512..1531)\n", + " /label=\"T7\"\n", + " /note=\"T7 promoter, forward primer\"\n", + " promoter complement(1513..1531)\n", + " /label=\"T7 promoter\"\n", + " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", + " primer_bind complement(1618..1635)\n", + " /label=\"L4440\"\n", + " /note=\"L4440 vector, forward primer\"\n", + " rep_origin complement(1789..2377)\n", + " /direction=LEFT\n", + " /label=\"ori\"\n", + " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", + " replication\"\n", + " primer_bind complement(1869..1888)\n", + " /label=\"pBR322ori-F\"\n", + " /note=\"pBR322 origin, forward primer\"\n", + " CDS complement(2548..3408)\n", + " /codon_start=1\n", + " /gene=\"bla\"\n", + " /product=\"beta-lactamase\"\n", + " /label=\"AmpR\"\n", + " /note=\"confers resistance to ampicillin, carbenicillin, and\n", + " related antibiotics\"\n", + " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", + " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", + " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", + " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", + " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", + " LIKHW\"\n", + " primer_bind 3171..3190\n", + " /label=\"Amp-R\"\n", + " /note=\"Ampicillin resistance gene, reverse primer\"\n", + " promoter complement(3409..3513)\n", + " /gene=\"bla\"\n", + " /label=\"AmpR promoter\"\n", + " primer_bind 3581..3599\n", + " /label=\"pBRforEco\"\n", + " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", + " primer\"\n", + " primer_bind complement(3637..3659)\n", + " /label=\"pGEX 3'\"\n", + " /note=\"pGEX vectors, reverse primer\"\n", + " primer_bind 3759..3778\n", + " /label=\"pRS-marker\"\n", + " /note=\"pRS vectors, use to sequence yeast selectable\n", + " marker\"\n", + " promoter 3859..3877\n", + " /label=\"SP6 promoter\"\n", + " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", + " primer_bind 3859..3876\n", + " /label=\"SP6\"\n", + " /note=\"SP6 promoter, forward primer\"\n", + " CDS join(3918..4341,4389..6160)\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " /codon_start=1\n", + " /product=\"antiparallel microtubule cross-linking factor\n", + " Ase1\"\n", + " /protein_id=\"CAC21482.1\"\n", + " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", + " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", + " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", + " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", + " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", + " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", + " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", + " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", + " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", + " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", + " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", + " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", + " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", + " primer_bind 3918..3937\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(6142..6160)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 cgcgccagat ctgtttagct tgcctcgtcc ccgccgggtc acccggccag cgacatggag\n", + " 61 gcccagaata ccctccttga cagtcttgac gtgcgcagct caggggcatg atgtgactgt\n", + " 121 cgcccgtaca tttagcccat acatccccat gtataatcat ttgcatccat acattttgat\n", + " 181 ggccgcacgg cgcgaagcaa aaattacggc tcctcgctgc agacctgcga gcagggaaac\n", + " 241 gctcccctca cagacgcgtt gaattgtccc cacgccgcgc ccctgtagag aaatataaaa\n", + " 301 ggttaggatt tgccactgag gttcttcttt catatacttc cttttaaaat cttgctagga\n", + " 361 tacagttctc acatcacatc cgaacataaa caaccatggg taaggaaaag actcacgttt\n", + " 421 cgaggccgcg attaaattcc aacatggatg ctgatttata tgggtataaa tgggctcgcg\n", + " 481 ataatgtcgg gcaatcaggt gcgacaatct atcgattgta tgggaagccc gatgcgccag\n", + " 541 agttgtttct gaaacatggc aaaggtagcg ttgccaatga tgttacagat gagatggtca\n", + " 601 gactaaactg gctgacggaa tttatgcctc ttccgaccat caagcatttt atccgtactc\n", + " 661 ctgatgatgc atggttactc accactgcga tccccggcaa aacagcattc caggtattag\n", + " 721 aagaatatcc tgattcaggt gaaaatattg ttgatgcgct ggcagtgttc ctgcgccggt\n", + " 781 tgcattcgat tcctgtttgt aattgtcctt ttaacagcga tcgcgtattt cgtctcgctc\n", + " 841 aggcgcaatc acgaatgaat aacggtttgg ttgatgcgag tgattttgat gacgagcgta\n", + " 901 atggctggcc tgttgaacaa gtctggaaag aaatgcataa gcttttgcca ttctcaccgg\n", + " 961 attcagtcgt cactcatggt gatttctcac ttgataacct tatttttgac gaggggaaat\n", + " 1021 taataggttg tattgatgtt ggacgagtcg gaatcgcaga ccgataccag gatcttgcca\n", + " 1081 tcctatggaa ctgcctcggt gagttttctc cttcattaca gaaacggctt tttcaaaaat\n", + " 1141 atggtattga taatcctgat atgaataaat tgcagtttca tttgatgctc gatgagtttt\n", + " 1201 tctaatcagt actgacaata aaaagattct tgttttcaag aacttgtcat ttgtatagtt\n", + " 1261 tttttatatt gtagttgttc tattttaatc aaatgttagc gtgatttata ttttttttcg\n", + " 1321 cctcgacatc atctgcccag atgcgaagtt aagtgcgcag aaagtaatat catgcgtcaa\n", + " 1381 tcgtatgtga atgctggtcg ctatactgct gtcgattcga tactaacgcc gccatccagt\n", + " 1441 ttaaacgagc tcgaattcat cgatgatatc agatccacta gtggcctatg cggccgcgga\n", + " 1501 tctgccggtc tccctatagt gagtcgtatt aatttcgata agccaggtta acctgcatta\n", + " 1561 atgaatcggc caacgcgcgg ggagaggcgg tttgcgtatt gggcgctctt ccgcttcctc\n", + " 1621 gctcactgac tcgctgcgct cggtcgttcg gctgcggcga gcggtatcag ctcactcaaa\n", + " 1681 ggcggtaata cggttatcca cagaatcagg ggataacgca ggaaagaaca tgtgagcaaa\n", + " 1741 aggccagcaa aaggccagga accgtaaaaa ggccgcgttg ctggcgtttt tccataggct\n", + " 1801 ccgcccccct gacgagcatc acaaaaatcg acgctcaagt cagaggtggc gaaacccgac\n", + " 1861 aggactataa agataccagg cgtttccccc tggaagctcc ctcgtgcgct ctcctgttcc\n", + " 1921 gaccctgccg cttaccggat acctgtccgc ctttctccct tcgggaagcg tggcgctttc\n", + " 1981 tcatagctca cgctgtaggt atctcagttc ggtgtaggtc gttcgctcca agctgggctg\n", + " 2041 tgtgcacgaa ccccccgttc agcccgaccg ctgcgcctta tccggtaact atcgtcttga\n", + " 2101 gtccaacccg gtaagacacg acttatcgcc actggcagca gccactggta acaggattag\n", + " 2161 cagagcgagg tatgtaggcg gtgctacaga gttcttgaag tggtggccta actacggcta\n", + " 2221 cactagaaga acagtatttg gtatctgcgc tctgctgaag ccagttacct tcggaaaaag\n", + " 2281 agttggtagc tcttgatccg gcaaacaaac caccgctggt agcggtggtt tttttgtttg\n", + " 2341 caagcagcag attacgcgca gaaaaaaagg atctcaagaa gatcctttga tcttttctac\n", + " 2401 ggggtctgac gctcagtgga acgaaaactc acgttaaggg attttggtca tgagattatc\n", + " 2461 aaaaaggatc ttcacctaga tccttttaaa ttaaaaatga agttttaaat caatctaaag\n", + " 2521 tatatatgag taaacttggt ctgacagtta ccaatgctta atcagtgagg cacctatctc\n", + " 2581 agcgatctgt ctatttcgtt catccatagt tgcctgactc cccgtcgtgt agataactac\n", + " 2641 gatacgggag ggcttaccat ctggccccag tgctgcaatg ataccgcgag acccacgctc\n", + " 2701 accggctcca gatttatcag caataaacca gccagccgga agggccgagc gcagaagtgg\n", + " 2761 tcctgcaact ttatccgcct ccatccagtc tattaattgt tgccgggaag ctagagtaag\n", + " 2821 tagttcgcca gttaatagtt tgcgcaacgt tgttgccatt gctacaggca tcgtggtgtc\n", + " 2881 acgctcgtcg tttggtatgg cttcattcag ctccggttcc caacgatcaa ggcgagttac\n", + " 2941 atgatccccc atgttgtgca aaaaagcggt tagctccttc ggtcctccga tcgttgtcag\n", + " 3001 aagtaagttg gccgcagtgt tatcactcat ggttatggca gcactgcata attctcttac\n", + " 3061 tgtcatgcca tccgtaagat gcttttctgt gactggtgag tactcaacca agtcattctg\n", + " 3121 agaatagtgt atgcggcgac cgagttgctc ttgcccggcg tcaatacggg ataataccgc\n", + " 3181 gccacatagc agaactttaa aagtgctcat cattggaaaa cgttcttcgg ggcgaaaact\n", + " 3241 ctcaaggatc ttaccgctgt tgagatccag ttcgatgtaa cccactcgtg cacccaactg\n", + " 3301 atcttcagca tcttttactt tcaccagcgt ttctgggtga gcaaaaacag gaaggcaaaa\n", + " 3361 tgccgcaaaa aagggaataa gggcgacacg gaaatgttga atactcatac tcttcctttt\n", + " 3421 tcaatattat tgaagcattt atcagggtta ttgtctcatg agcggataca tatttgaatg\n", + " 3481 tatttagaaa aataaacaaa taggggttcc gcgcacattt ccccgaaaag tgccacctga\n", + " 3541 cgtctaagaa accattatta tcatgacatt aacctataaa aataggcgta tcacgaggcc\n", + " 3601 ctttcgtctc gcgcgtttcg gtgatgacgg tgaaaacctc tgacacatgc agctcccgga\n", + " 3661 gacggtcaca gcttgtctgt aagcggatgc cgggagcaga caagcccgtc agggcgcgtc\n", + " 3721 agcgggtgtt ggcgggtgtc ggggctggct taactatgcg gcatcagagc agattgtact\n", + " 3781 gagagtgcac catatggaca tattgtcgtt agaacgcggc tacaattaat acataacctt\n", + " 3841 atgtatcata cacatacgat ttaggtgaca ctatagaacg cggccgccag ctgaagcttc\n", + " 3901 gtacgctgca ggtcgacatg caaacagtaa tgatggatga cattcaaagc actgattcta\n", + " 3961 ttgctgaaaa agataatcac tctaataatg aatctaactt tacttggaaa gcgtttcgtg\n", + " 4021 aacaagtgga aaagcatttt tctaaaattg aaaggcttca ccaagtcctt ggaacagatg\n", + " 4081 gagacaattc atcattattt gagttgttta caacggcaat gaatgcccag cttcatgaaa\n", + " 4141 tggaacagtg ccagaaaaaa cttgaagatg actgtcagca aagaattgat tcaatcagat\n", + " 4201 ttttggtttc ctcattaaag ttaacggatg atacttctag tctcaaaatt gagtctcctt\n", + " 4261 taattcagtg tttgaatcgt ttgtcaatgg tagaaggaca atatatggca cagtatgatc\n", + " 4321 aaaagttaag tacgattaaa ggtatgtaat cgtctttaat ttagacttgt gttttaactg\n", + " 4381 atgtatagaa atgtatcaca aattggagtc atattgtaac cgcttaggaa gtccgttcgt\n", + " 4441 tttacctgat tttgagaatt catttttatc tgatgtatcc gatgctttta ctgaatcttt\n", + " 4501 gagaggacgc atcaacgaag ccgaaaagga gattgatgcg agattagagg ttattaattc\n", + " 4561 ctttgaagaa gaaattttgg gtttgtggtc tgaactcggt gttgagcccg ctgatgttcc\n", + " 4621 acaatacgaa caattgcttg aatcccatac taatcgacca aatgatgttt atgttactca\n", + " 4681 agaacttatc gaccaacttt gcaagcaaaa agaagttttt tccgctgaaa aagaaaagag\n", + " 4741 aagtgatcat ttaaaaagta tacaatcaga agttagcaac ttgtggaata agcttcaagt\n", + " 4801 ttctcccaat gaacaaagtc aatttggcga ttcatcaaac attaatcaag aaaatatttc\n", + " 4861 attatgggaa actgaacttg aaaaacttca tcagttaaaa aaggagcatt tacccatttt\n", + " 4921 tttagaagac tgtcgtcaac aaattcttca gctttgggat tctctgtttt attcagaaga\n", + " 4981 acaaagaaag tcctttacac ctatgtatga agacattatt acagagcagg ttcttacggc\n", + " 5041 ccatgaaaac tatataaagc aactagaggc cgaagtttct gctaataagt cctttttaag\n", + " 5101 cttaattaat cgctatgcct ctttaataga aggaaagaaa gagcttgaag ctagttctaa\n", + " 5161 tgatgcctct cgtctaacac aacggggacg ccgggaccca ggtttacttc tacgtgaaga\n", + " 5221 gaaaatccgt aagcgacttt ctagagaact tcctaaggtt cagtcgctgc ttataccaga\n", + " 5281 gattacagca tgggaagaaa gaaatggaag gacgttcctt ttttatgatg aaccacttct\n", + " 5341 caagatttgc caagaggcca ctcaaccaaa atcattatat agaagtgcaa gtgctgccgc\n", + " 5401 aaaccgcccg aaaacagcaa ctacaacgga ctctgttaat agaacacctt ctcaacgagg\n", + " 5461 gcgtgtagct gtaccttcaa caccaagtgt taggtccgct tctcgagcta tgacgagtcc\n", + " 5521 aaggacaccg cttcctagag taaaaaacac tcaaaatcca agtcgttcca ttagtgcaga\n", + " 5581 accgccatca gcaaccagta ccgccaatag aagacacccc actgctaatc gaattgatat\n", + " 5641 aaacgctaga ttaaacagtg ctagtcggtc tcgaagcgcg aacatgataa gacaaggggc\n", + " 5701 aaatggtagt gacagcaata tgtcttcttc acccgtttct ggaaattcca ataccccttt\n", + " 5761 taacaagttt ccaaattctg tatctcgcaa tacacatttt gaatccaagt caccgcaccc\n", + " 5821 aaattactct cgaactcctc atgaaacgta ttcaaaggct tcatctaaga acgtcccatt\n", + " 5881 aagtcctcca aagcagcgtg tagttaatga acacgcttta aatattatgt cggaaaaatt\n", + " 5941 gcaaagaact aatctgaaag aacaaacacc cgagatggac attgaaaaca gctcgcagaa\n", + " 6001 ccttcctttt tctcctatga agatatcccc cataagagca tcacccgtaa agacaattcc\n", + " 6061 atcatcaccg tcccccacta ccaacatttt ttctgctcca ctcaacaata ttacaaattg\n", + " 6121 tacaccgatg gaggatgaat ggggagaaga aggcttttaa tgg\n", + "//\n" + ] + } + ], + "source": [ + "# Ligating, then circularising the synthetic plasmid\n", + "\n", + "synthetic_vector = plasmid_digests[1] + gene_digests [1]\n", + "synthetic_vector = synthetic_vector.looped()\n", + "\n", + "# Printing out the completed cloning vector\n", + "\n", + "print(synthetic_vector.format(\"gb\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Gibson.ipynb b/docs/notebooks/Gibson.ipynb index 2fb3dad2..5bba334b 100644 --- a/docs/notebooks/Gibson.ipynb +++ b/docs/notebooks/Gibson.ipynb @@ -1,167 +1,167 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Gibson Assembly in pydna\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "Gibson Assembly is a powerful method to assemble multiple DNA fragments into a single, continuous sequence in a seamless, one-step reaction. Developed by Daniel Gibson and colleagues in 2009, this method has been widely applied to work in molecular cloning, biotechnology, and synthetic biology. \n", - "\n", - "`pydna` provides the `Assembly` class to simulate the assembly of DNA sequences. Below is an example fpr performing Gibson Assembly with pre-existing DNA fragments, followed by primer design for generating these fragments via the `pcr` method, if needed.\n", - "\n", - "The `Assembly` takes the following arguments:\n", - " * `frags`: list of DNA fragments as `Dseqrecord` objects\n", - " * `limit`: the minimum sequence homology required.\n", - " * `algorithm`: the function used to find homology regions between DNA fragments. For Gibson Assembly, we use the `terminal_overlap` function, which finds homology regions only at the terminal regions. By default, the `Assembly` class uses the `common_sub_strings` function to find homology regions, which finds homology anywhere, as it could happen in a homologous recombination event.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Assembly\n", - "fragments..: 33bp 34bp 35bp\n", - "limit(bp)..: 14\n", - "G.nodes....: 6\n", - "algorithm..: terminal_overlap\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.assembly import Assembly\n", - "from pydna.common_sub_strings import terminal_overlap\n", - "\n", - "#Creating example Dseqrecord sequences\n", - "fragment1 = Dseqrecord(\"acgatgctatactgCCCCCtgtgctgtgctcta\")\n", - "fragment2 = Dseqrecord(\"tgtgctgtgctctaTTTTTtattctggctgtatc\")\n", - "fragment3 = Dseqrecord(\"tattctggctgtatcGGGGGtacgatgctatactg\")\n", - "\n", - "#Creating a list of sequences to assemble\n", - "fragments = [fragment1, fragment2, fragment3]\n", - "\n", - "#Performing Gibson assembly, with a minimum shared homology of 14bp\n", - "assembly = Assembly(fragments, limit=14, algorithm=terminal_overlap)\n", - "\n", - "#Displaying the assembled product\n", - "print(assembly)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The printed output shows the length of each fragment provided to the assembly, the minimum length required for sequence homology search, the number of nodes (number of overlapping regions), and the algorithm used for sequence homology search. Please refer to the full `Assembly` module documentation for more information on the algorithm applied.\n", - "\n", - "To make a circular sequence from an `Assembly`, pydna provides the `assemble_circular` method. The assembled sequence can be printed as normal, as `Dseqrecord` objects. Note that the `assemble_circular` method returns a list, where the two elements are reverse complement of each other." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: True\n", - "size: 59\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(o59)\n", - "acga..GGGt\n", - "tgct..CCCa\n", - "\n", - "Dseqrecord\n", - "circular: True\n", - "size: 59\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(o59)\n", - "taga..AAAA\n", - "atct..TTTT\n" - ] - } - ], - "source": [ - "from pydna.contig import Contig\n", - "\n", - "#Circularizing the assembled sequence\n", - "assembly_circ = assembly.assemble_circular()\n", - "\n", - "#Printing the sequence records\n", - "print(assembly_circ[0])\n", - "print()\n", - "print(assembly_circ[1])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Please refer to the Example_Gibson page for an example of a completed workflow for modelling Gibson Assembly using pydna. " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gibson Assembly in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "Gibson Assembly is a powerful method to assemble multiple DNA fragments into a single, continuous sequence in a seamless, one-step reaction. Developed by Daniel Gibson and colleagues in 2009, this method has been widely applied to work in molecular cloning, biotechnology, and synthetic biology. \n", + "\n", + "`pydna` provides the `Assembly` class to simulate the assembly of DNA sequences. Below is an example fpr performing Gibson Assembly with pre-existing DNA fragments, followed by primer design for generating these fragments via the `pcr` method, if needed.\n", + "\n", + "The `Assembly` takes the following arguments:\n", + " * `frags`: list of DNA fragments as `Dseqrecord` objects\n", + " * `limit`: the minimum sequence homology required.\n", + " * `algorithm`: the function used to find homology regions between DNA fragments. For Gibson Assembly, we use the `terminal_overlap` function, which finds homology regions only at the terminal regions. By default, the `Assembly` class uses the `common_sub_strings` function to find homology regions, which finds homology anywhere, as it could happen in a homologous recombination event.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assembly\n", + "fragments..: 33bp 34bp 35bp\n", + "limit(bp)..: 14\n", + "G.nodes....: 6\n", + "algorithm..: terminal_overlap\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.assembly import Assembly\n", + "from pydna.common_sub_strings import terminal_overlap\n", + "\n", + "#Creating example Dseqrecord sequences\n", + "fragment1 = Dseqrecord(\"acgatgctatactgCCCCCtgtgctgtgctcta\")\n", + "fragment2 = Dseqrecord(\"tgtgctgtgctctaTTTTTtattctggctgtatc\")\n", + "fragment3 = Dseqrecord(\"tattctggctgtatcGGGGGtacgatgctatactg\")\n", + "\n", + "#Creating a list of sequences to assemble\n", + "fragments = [fragment1, fragment2, fragment3]\n", + "\n", + "#Performing Gibson assembly, with a minimum shared homology of 14bp\n", + "assembly = Assembly(fragments, limit=14, algorithm=terminal_overlap)\n", + "\n", + "#Displaying the assembled product\n", + "print(assembly)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The printed output shows the length of each fragment provided to the assembly, the minimum length required for sequence homology search, the number of nodes (number of overlapping regions), and the algorithm used for sequence homology search. Please refer to the full `Assembly` module documentation for more information on the algorithm applied.\n", + "\n", + "To make a circular sequence from an `Assembly`, pydna provides the `assemble_circular` method. The assembled sequence can be printed as normal, as `Dseqrecord` objects. Note that the `assemble_circular` method returns a list, where the two elements are reverse complement of each other." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: True\n", + "size: 59\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(o59)\n", + "acga..GGGt\n", + "tgct..CCCa\n", + "\n", + "Dseqrecord\n", + "circular: True\n", + "size: 59\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(o59)\n", + "taga..AAAA\n", + "atct..TTTT\n" + ] + } + ], + "source": [ + "from pydna.contig import Contig\n", + "\n", + "#Circularizing the assembled sequence\n", + "assembly_circ = assembly.assemble_circular()\n", + "\n", + "#Printing the sequence records\n", + "print(assembly_circ[0])\n", + "print()\n", + "print(assembly_circ[1])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please refer to the Example_Gibson page for an example of a completed workflow for modelling Gibson Assembly using pydna. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Importing_Seqs.ipynb b/docs/notebooks/Importing_Seqs.ipynb index fa7cd1a9..b606c5ab 100755 --- a/docs/notebooks/Importing_Seqs.ipynb +++ b/docs/notebooks/Importing_Seqs.ipynb @@ -1,408 +1,408 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Importing and viewing sequence files in pydna\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "pydna can be used to work with FASTA, Genbank, EMBL, and snapgene files (.fasta, .gb, .embl, .dna). You can read these files into a `Dseqrecord` that one can view and work with. You can also instantiate `Dseqrecord` objects with strings.\n", - "\n", - "## Importing Sequence Files\n", - "\n", - "To import files into pydna is simple. pydna provides the `parse` method to read all DNA sequences in a file into a list. As an input, `parse` can take:\n", - "\n", - "* The path to a file from your computer\n", - "* A python string with the file content.\n", - "\n", - "The following code shows an example of how to use the `parse` function to import a FASTA file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">lcl|U49845.1_cds_AAA98665.1_1 [protein=TCP1-beta] [frame=3] [protein_id=AAA98665.1] [location=<1..206] [gbkey=CDS]\n", - "TCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCC\n", - "GACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCT\n", - "GCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGA\n", - "ACCGCCAATAGACAACATATGTAA\n" - ] - } - ], - "source": [ - "from pydna.parsers import parse\n", - "\n", - "#Import your file into python using its path\n", - "file_path = \"./U49845.fasta\"\n", - "files = parse(file_path)\n", - "\n", - "#Show your FASTA file in python\n", - "print(files[0].format(\"fasta\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that `parse` returns a `list` object, hence requiring `[0]` to take the first element of the list. When you have a FASTA file that contains multiple sequences, you can index the list accordingly (e.g `[0]`, `[1]`, ...)\n", - "\n", - "The last line of code uses the `format` method to generate a string representation of the sequence as a FASTA file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another example, using a GenBank file ([U49845](https://www.ncbi.nlm.nih.gov/nucleotide/U49845)), is shown below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LOCUS SCU49845 5028 bp DNA linear PLN 29-OCT-2018\n", - "DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p\n", - " (AXL2) and Rev7p (REV7) genes, complete cds.\n", - "ACCESSION U49845\n", - "VERSION U49845.1\n", - "KEYWORDS .\n", - "SOURCE Saccharomyces cerevisiae (brewer's yeast)\n", - " ORGANISM Saccharomyces cerevisiae\n", - " Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina;\n", - " Saccharomycetes; Saccharomycetales; Saccharomycetaceae;\n", - " Saccharomyces.\n", - "REFERENCE 1 (bases 1 to 5028)\n", - " AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M.\n", - " TITLE Selection of axial growth sites in yeast requires Axl2p, a novel\n", - " plasma membrane glycoprotein\n", - " JOURNAL Genes Dev. 10 (7), 777-793 (1996)\n", - " PUBMED 8846915\n", - "REFERENCE 2 (bases 1 to 5028)\n", - " AUTHORS Roemer,T.\n", - " TITLE Direct Submission\n", - " JOURNAL Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT\n", - " 06520, USA\n", - "FEATURES Location/Qualifiers\n", - " source 1..5028\n", - " /organism=\"Saccharomyces cerevisiae\"\n", - " /mol_type=\"genomic DNA\"\n", - " /db_xref=\"taxon:4932\"\n", - " /chromosome=\"IX\"\n", - " mRNA <1..>206\n", - " /product=\"TCP1-beta\"\n", - " CDS <1..206\n", - " /codon_start=3\n", - " /product=\"TCP1-beta\"\n", - " /protein_id=\"AAA98665.1\"\n", - " /translation=\"SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAA\n", - " EVLLRVDNIIRARPRTANRQHM\"\n", - " gene <687..>3158\n", - " /gene=\"AXL2\"\n", - " mRNA <687..>3158\n", - " /gene=\"AXL2\"\n", - " /product=\"Axl2p\"\n", - " CDS 687..3158\n", - " /gene=\"AXL2\"\n", - " /note=\"plasma membrane glycoprotein\"\n", - " /codon_start=1\n", - " /product=\"Axl2p\"\n", - " /protein_id=\"AAA98666.1\"\n", - " /translation=\"MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFT\n", - " FQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVI\n", - " LEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFN\n", - " VTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYS\n", - " FVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDD\n", - " PISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFN\n", - " FEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKF\n", - " QSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSS\n", - " HHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGV\n", - " ILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSY\n", - " DDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKP\n", - " PVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKL\n", - " FDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQ\n", - " SGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQV\n", - " KDIHGRIPEML\"\n", - " gene complement(<3300..>4037)\n", - " /gene=\"REV7\"\n", - " mRNA complement(<3300..>4037)\n", - " /gene=\"REV7\"\n", - " /product=\"Rev7p\"\n", - " CDS complement(3300..4037)\n", - " /gene=\"REV7\"\n", - " /codon_start=1\n", - " /product=\"Rev7p\"\n", - " /protein_id=\"AAA98667.1\"\n", - " /translation=\"MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQF\n", - " VPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKD\n", - " DQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVD\n", - " SLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISG\n", - " DDKILNGVYSQYEEGESIFGSLF\"\n", - "ORIGIN\n", - " 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg\n", - " 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct\n", - " 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa\n", - " 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg\n", - " 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa\n", - " 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa\n", - " 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat\n", - " 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga\n", - " 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc\n", - " 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga\n", - " 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta\n", - " 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag\n", - " 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa\n", - " 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata\n", - " 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga\n", - " 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac\n", - " 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg\n", - " 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc\n", - " 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa\n", - " 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca\n", - " 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac\n", - " 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa\n", - " 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag\n", - " 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct\n", - " 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac\n", - " 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa\n", - " 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc\n", - " 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata\n", - " 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca\n", - " 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc\n", - " 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc\n", - " 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca\n", - " 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc\n", - " 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg\n", - " 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt\n", - " 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc\n", - " 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg\n", - " 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca\n", - " 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata\n", - " 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg\n", - " 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga\n", - " 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt\n", - " 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat\n", - " 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt\n", - " 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc\n", - " 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag\n", - " 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta\n", - " 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa\n", - " 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact\n", - " 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt\n", - " 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa\n", - " 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag\n", - " 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct\n", - " 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt\n", - " 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact\n", - " 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa\n", - " 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg\n", - " 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt\n", - " 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc\n", - " 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca\n", - " 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc\n", - " 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc\n", - " 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat\n", - " 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa\n", - " 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga\n", - " 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat\n", - " 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc\n", - " 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc\n", - " 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa\n", - " 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg\n", - " 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc\n", - " 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt\n", - " 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg\n", - " 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg\n", - " 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt\n", - " 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt\n", - " 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat\n", - " 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc\n", - " 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct\n", - " 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta\n", - " 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac\n", - " 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct\n", - " 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct\n", - " 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc\n", - "//" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.parsers import parse\n", - "\n", - "file_path = \"./U49845.gb\"\n", - "files = parse(file_path)\n", - "\n", - "# Convert the Dseqrecord object into a formatted string in GenBank format\n", - "files[0].format(\"gb\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, you can work with the sequence record using pydna, using the `Dseqrecord` class. `Dseqrecord` provides ways to highlight regions of interest on the sequence, adding new features to the record, removing features, and creating new `Dseqrecord` objects to store and export your changes. Please refer to the `Dseq_Features` notebook for more information." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Importing Sequences from Strings\n", - "\n", - "`parse` also allows sequences to be read from a string alone. This could be useful to read FASTA sequences obtained from GenBank APIs. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", - "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATT\n", - "TTATTTTATAGAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTC\n", - "AACTTGCCGCAGTTCGTTCCCATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAA\n", - "CTTATACTGGATGTTCTTTCTAAATTAACGCACGTTTACAGATTTTCCATCTGCATTATT\n", - "AATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAGATTTTAGTGAATTACAACAT\n", - "GTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATTCCGATCTTCC\n", - "TTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", - "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGG\n", - "AGGGTCGATAGTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGT\n", - "CAAGAAGATGAAAATTTACCAGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACT\n", - "TCTTTAGTCGGTTCTGACGTGGGGCCTTTGATTATTCATCAGTTTAGTGAAAAATTAATC\n", - "AGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAATATGAAGAGGGCGAGAGCATT\n", - "TTTGGATCTTTGTTTTAA\n" - ] - } - ], - "source": [ - "from pydna.parsers import parse\n", - "\n", - "my_record = parse(\n", - "'''\n", - ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", - "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATTTTATTTTATA\n", - "GAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTCAACTTGCCGCAGTTCGTTCC\n", - "CATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAACTTATACTGGATGTTCTTTCTAAATTAACG\n", - "CACGTTTACAGATTTTCCATCTGCATTATTAATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAG\n", - "ATTTTAGTGAATTACAACATGTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATT\n", - "CCGATCTTCCTTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", - "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGGAGGGTCGATA\n", - "GTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGTCAAGAAGATGAAAATTTACC\n", - "AGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACTTCTTTAGTCGGTTCTGACGTGGGGCCTTTG\n", - "ATTATTCATCAGTTTAGTGAAAAATTAATCAGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAAT\n", - "ATGAAGAGGGCGAGAGCATTTTTGGATCTTTGTTTTAA\n", - "'''\n", - ")\n", - "print(my_record[0].format(\"fasta\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra info\n", - "\n", - "Note that pydna's `parse` guesses whether the argument passed is a file path or a string, and also guesses the file type based on the content, so it can give unexpected behaviour if your files are not well formatted. To have more control over the parsing of sequences, you can use biopython's `parse` from `Bio.SeqIO`, and then instantiate a `Dseqrecord` from the biopython's `SeqRecord`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(-5028)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from Bio.SeqIO import parse as seqio_parse\n", - "from pydna.dseqrecord import Dseqrecord\n", - "\n", - "file_path = './U49845.gb'\n", - "\n", - "# Extract the first Seqrecord of the SeqIO.parse iterator\n", - "seq_record = next(seqio_parse(file_path, 'genbank'))\n", - "\n", - "# This is how circularity is stored in biopython's seqrecord\n", - "is_circular = 'topology' in seq_record.annotations.keys() and seq_record.annotations['topology'] == 'circular'\n", - "\n", - "# Convert into Dseqrecord\n", - "dseq_record = Dseqrecord(seq_record, circular=is_circular)\n", - "\n", - "dseq_record" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Importing and viewing sequence files in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "pydna can be used to work with FASTA, Genbank, EMBL, and snapgene files (.fasta, .gb, .embl, .dna). You can read these files into a `Dseqrecord` that one can view and work with. You can also instantiate `Dseqrecord` objects with strings.\n", + "\n", + "## Importing Sequence Files\n", + "\n", + "To import files into pydna is simple. pydna provides the `parse` method to read all DNA sequences in a file into a list. As an input, `parse` can take:\n", + "\n", + "* The path to a file from your computer\n", + "* A python string with the file content.\n", + "\n", + "The following code shows an example of how to use the `parse` function to import a FASTA file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">lcl|U49845.1_cds_AAA98665.1_1 [protein=TCP1-beta] [frame=3] [protein_id=AAA98665.1] [location=<1..206] [gbkey=CDS]\n", + "TCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCC\n", + "GACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCT\n", + "GCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGA\n", + "ACCGCCAATAGACAACATATGTAA\n" + ] + } + ], + "source": [ + "from pydna.parsers import parse\n", + "\n", + "#Import your file into python using its path\n", + "file_path = \"./U49845.fasta\"\n", + "files = parse(file_path)\n", + "\n", + "#Show your FASTA file in python\n", + "print(files[0].format(\"fasta\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `parse` returns a `list` object, hence requiring `[0]` to take the first element of the list. When you have a FASTA file that contains multiple sequences, you can index the list accordingly (e.g `[0]`, `[1]`, ...)\n", + "\n", + "The last line of code uses the `format` method to generate a string representation of the sequence as a FASTA file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another example, using a GenBank file ([U49845](https://www.ncbi.nlm.nih.gov/nucleotide/U49845)), is shown below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOCUS SCU49845 5028 bp DNA linear PLN 29-OCT-2018\n", + "DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p\n", + " (AXL2) and Rev7p (REV7) genes, complete cds.\n", + "ACCESSION U49845\n", + "VERSION U49845.1\n", + "KEYWORDS .\n", + "SOURCE Saccharomyces cerevisiae (brewer's yeast)\n", + " ORGANISM Saccharomyces cerevisiae\n", + " Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina;\n", + " Saccharomycetes; Saccharomycetales; Saccharomycetaceae;\n", + " Saccharomyces.\n", + "REFERENCE 1 (bases 1 to 5028)\n", + " AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M.\n", + " TITLE Selection of axial growth sites in yeast requires Axl2p, a novel\n", + " plasma membrane glycoprotein\n", + " JOURNAL Genes Dev. 10 (7), 777-793 (1996)\n", + " PUBMED 8846915\n", + "REFERENCE 2 (bases 1 to 5028)\n", + " AUTHORS Roemer,T.\n", + " TITLE Direct Submission\n", + " JOURNAL Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT\n", + " 06520, USA\n", + "FEATURES Location/Qualifiers\n", + " source 1..5028\n", + " /organism=\"Saccharomyces cerevisiae\"\n", + " /mol_type=\"genomic DNA\"\n", + " /db_xref=\"taxon:4932\"\n", + " /chromosome=\"IX\"\n", + " mRNA <1..>206\n", + " /product=\"TCP1-beta\"\n", + " CDS <1..206\n", + " /codon_start=3\n", + " /product=\"TCP1-beta\"\n", + " /protein_id=\"AAA98665.1\"\n", + " /translation=\"SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAA\n", + " EVLLRVDNIIRARPRTANRQHM\"\n", + " gene <687..>3158\n", + " /gene=\"AXL2\"\n", + " mRNA <687..>3158\n", + " /gene=\"AXL2\"\n", + " /product=\"Axl2p\"\n", + " CDS 687..3158\n", + " /gene=\"AXL2\"\n", + " /note=\"plasma membrane glycoprotein\"\n", + " /codon_start=1\n", + " /product=\"Axl2p\"\n", + " /protein_id=\"AAA98666.1\"\n", + " /translation=\"MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFT\n", + " FQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVI\n", + " LEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFN\n", + " VTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYS\n", + " FVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDD\n", + " PISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFN\n", + " FEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKF\n", + " QSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSS\n", + " HHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGV\n", + " ILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSY\n", + " DDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKP\n", + " PVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKL\n", + " FDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQ\n", + " SGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQV\n", + " KDIHGRIPEML\"\n", + " gene complement(<3300..>4037)\n", + " /gene=\"REV7\"\n", + " mRNA complement(<3300..>4037)\n", + " /gene=\"REV7\"\n", + " /product=\"Rev7p\"\n", + " CDS complement(3300..4037)\n", + " /gene=\"REV7\"\n", + " /codon_start=1\n", + " /product=\"Rev7p\"\n", + " /protein_id=\"AAA98667.1\"\n", + " /translation=\"MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQF\n", + " VPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKD\n", + " DQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVD\n", + " SLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISG\n", + " DDKILNGVYSQYEEGESIFGSLF\"\n", + "ORIGIN\n", + " 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg\n", + " 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct\n", + " 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa\n", + " 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg\n", + " 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa\n", + " 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa\n", + " 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat\n", + " 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga\n", + " 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc\n", + " 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga\n", + " 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta\n", + " 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag\n", + " 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa\n", + " 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata\n", + " 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga\n", + " 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac\n", + " 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg\n", + " 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc\n", + " 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa\n", + " 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca\n", + " 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac\n", + " 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa\n", + " 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag\n", + " 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct\n", + " 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac\n", + " 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa\n", + " 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc\n", + " 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata\n", + " 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca\n", + " 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc\n", + " 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc\n", + " 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca\n", + " 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc\n", + " 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg\n", + " 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt\n", + " 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc\n", + " 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg\n", + " 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca\n", + " 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata\n", + " 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg\n", + " 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga\n", + " 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt\n", + " 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat\n", + " 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt\n", + " 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc\n", + " 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag\n", + " 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta\n", + " 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa\n", + " 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact\n", + " 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt\n", + " 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa\n", + " 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag\n", + " 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct\n", + " 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt\n", + " 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact\n", + " 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa\n", + " 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg\n", + " 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt\n", + " 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc\n", + " 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca\n", + " 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc\n", + " 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc\n", + " 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat\n", + " 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa\n", + " 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga\n", + " 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat\n", + " 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc\n", + " 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc\n", + " 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa\n", + " 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg\n", + " 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc\n", + " 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt\n", + " 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg\n", + " 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg\n", + " 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt\n", + " 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt\n", + " 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat\n", + " 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc\n", + " 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct\n", + " 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta\n", + " 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac\n", + " 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct\n", + " 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct\n", + " 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc\n", + "//" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.parsers import parse\n", + "\n", + "file_path = \"./U49845.gb\"\n", + "files = parse(file_path)\n", + "\n", + "# Convert the Dseqrecord object into a formatted string in GenBank format\n", + "files[0].format(\"gb\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, you can work with the sequence record using pydna, using the `Dseqrecord` class. `Dseqrecord` provides ways to highlight regions of interest on the sequence, adding new features to the record, removing features, and creating new `Dseqrecord` objects to store and export your changes. Please refer to the `Dseq_Features` notebook for more information." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing Sequences from Strings\n", + "\n", + "`parse` also allows sequences to be read from a string alone. This could be useful to read FASTA sequences obtained from GenBank APIs. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", + "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATT\n", + "TTATTTTATAGAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTC\n", + "AACTTGCCGCAGTTCGTTCCCATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAA\n", + "CTTATACTGGATGTTCTTTCTAAATTAACGCACGTTTACAGATTTTCCATCTGCATTATT\n", + "AATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAGATTTTAGTGAATTACAACAT\n", + "GTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATTCCGATCTTCC\n", + "TTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", + "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGG\n", + "AGGGTCGATAGTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGT\n", + "CAAGAAGATGAAAATTTACCAGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACT\n", + "TCTTTAGTCGGTTCTGACGTGGGGCCTTTGATTATTCATCAGTTTAGTGAAAAATTAATC\n", + "AGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAATATGAAGAGGGCGAGAGCATT\n", + "TTTGGATCTTTGTTTTAA\n" + ] + } + ], + "source": [ + "from pydna.parsers import parse\n", + "\n", + "my_record = parse(\n", + "'''\n", + ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", + "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATTTTATTTTATA\n", + "GAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTCAACTTGCCGCAGTTCGTTCC\n", + "CATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAACTTATACTGGATGTTCTTTCTAAATTAACG\n", + "CACGTTTACAGATTTTCCATCTGCATTATTAATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAG\n", + "ATTTTAGTGAATTACAACATGTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATT\n", + "CCGATCTTCCTTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", + "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGGAGGGTCGATA\n", + "GTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGTCAAGAAGATGAAAATTTACC\n", + "AGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACTTCTTTAGTCGGTTCTGACGTGGGGCCTTTG\n", + "ATTATTCATCAGTTTAGTGAAAAATTAATCAGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAAT\n", + "ATGAAGAGGGCGAGAGCATTTTTGGATCTTTGTTTTAA\n", + "'''\n", + ")\n", + "print(my_record[0].format(\"fasta\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra info\n", + "\n", + "Note that pydna's `parse` guesses whether the argument passed is a file path or a string, and also guesses the file type based on the content, so it can give unexpected behaviour if your files are not well formatted. To have more control over the parsing of sequences, you can use biopython's `parse` from `Bio.SeqIO`, and then instantiate a `Dseqrecord` from the biopython's `SeqRecord`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(-5028)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from Bio.SeqIO import parse as seqio_parse\n", + "from pydna.dseqrecord import Dseqrecord\n", + "\n", + "file_path = './U49845.gb'\n", + "\n", + "# Extract the first Seqrecord of the SeqIO.parse iterator\n", + "seq_record = next(seqio_parse(file_path, 'genbank'))\n", + "\n", + "# This is how circularity is stored in biopython's seqrecord\n", + "is_circular = 'topology' in seq_record.annotations.keys() and seq_record.annotations['topology'] == 'circular'\n", + "\n", + "# Convert into Dseqrecord\n", + "dseq_record = Dseqrecord(seq_record, circular=is_circular)\n", + "\n", + "dseq_record" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/PCR.ipynb b/docs/notebooks/PCR.ipynb index b8db61dc..1ecfe0b4 100755 --- a/docs/notebooks/PCR.ipynb +++ b/docs/notebooks/PCR.ipynb @@ -1,398 +1,398 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to Perform a Polymerase Chain Reaction (PCR)\n", - "\n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "This page walks you through how to perform a PCR, and design PCR primers, using `pydna`. A PCR amplifies a specific stretch of DNA defined by the primers, and it is critical to ensure primer binding specificity and appropriate primer melting temperature (tm) through careful design. `pydna` provides tools for quick modelling of PCR to check for correct PCR products, and methods for calculating tm and primer design, as performed in other popular web servcies (e.g Primer3Plus). \n", - "\n", - "## Modelling PCR with Known Primers\n", - "\n", - "To perform PCR, `pydna` provides the `anneal` class and the `pcr` method to quickly generate expected primer products, on a `Dseqrecord` object. The `pcr` method needs only the forward and reverse primers, and the sequence. The primers must be passed from the 5' to the 3' end, following biological convention. More information on `Dseqrecord` and importing DNA sequences can be found in the other guide pages. \n", - " \n", - "The following example uses a 300+ bp custom sample circular DNA, containing an example gene that we would like to clone. 18 bp forward and reverse primers have been provided. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LOCUS 45bp_PCR_prod 45 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_description_description.\n", - "ACCESSION 45bp\n", - "VERSION 45bp\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 1..45\n", - " /label=\"example_gene\"\n", - " primer_bind 1..18\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACATC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(28..45)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTT\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", - "//" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.amplify import pcr\n", - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.parsers import parse\n", - "\n", - "#Importing GenBank file containing sample sequence \n", - "path = \"./sample_seq.gb\"\n", - "record = parse(path)[0]\n", - "\n", - "#Defining forward and reverse primers\n", - "fwd_primer = \"ATTCCTGCAGAGTACATC\"\n", - "rvs_primer = \"ACCATCCGAAGATATCTT\"\n", - "\n", - "#Performing PCR\n", - "pcr_product = pcr(fwd_primer, rvs_primer, record)\n", - "\n", - "#Printing results\n", - "pcr_product.format(\"gb\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `pcr` method then returns a `Amplicon` class object (to model a PCR product), a subclass of `Dseqrecord` with some extra methods (e.g `.figure`. See \"Other ways of visualising the PCR products\" section below). All the features inside the PCR product has been retained. Note how the example gene has been retained as a feature in `pcr_product`. In addition, two new features have been added to the record to indicate the forward and reverse primer binding regions.\n", - "\n", - "`pydna` also allows modelling for PCR with extra bases on the 5' end of primers. This functionality is useful for modelling molecular cloning with multiple steps, where you might want to add different restriction sites to PCR products and ensure that the right sequences have been replicated.\n", - "\n", - "For instance, to make sure that I can add a HindIII restriction site (AAGCTT) at the end of my `example_gene` without accidental hybridisation with other parts of the circular sequence, I can perform PCR in the `pydna` package like so." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LOCUS 57bp_PCR_prod 57 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_description_description.\n", - "ACCESSION 57bp\n", - "VERSION 57bp\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " primer_bind 1..21\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:AAGCTTATTCCTGCAGAGTACATC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " gene 4..48\n", - " /label=\"example_gene\"\n", - " primer_bind complement(31..48)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:AAGCTTACCATCCGAAGATATCTT\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 aagcttattc ctgcagagta catcaattct atgaagatat cttcggatgg taagctt\n", - "//" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fwd_primer = \"AAGCTTATTCCTGCAGAGTACATC\"\n", - "rvs_primer = \"AAGCTTACCATCCGAAGATATCTT\"\n", - "\n", - "#Performing PCR\n", - "pcr_product_HindIII = pcr(fwd_primer, rvs_primer, record)\n", - "\n", - "#Printing results\n", - "pcr_product_HindIII.format(\"gb\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more information on restriction digests and ligation, please refer to the Restriction and Ligation page. \n", - "\n", - "## Other ways of visualising the PCR products \n", - "\n", - "In addition to the normal `print` function and the `.format()` method (More information can be found in Dseq and Importing_Seqs pages, respectively), pcr products can also be visualized in other ways.\n", - "\n", - "We can check the sequence of the pcr products alone using the `.seq` attribute on a PCR product:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ATTCCTGCAGAGTACATCAATTCTATGAAGATATCTTCGGATGGT\n" - ] - } - ], - "source": [ - "print(pcr_product.seq)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also visualize the pcr products as a figure, using the `.figure` method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5ATTCCTGCAGAGTACATC...AAGATATCTTCGGATGGT3\n", - " ||||||||||||||||||\n", - " 3TTCTATAGAAGCCTACCA5\n", - "5ATTCCTGCAGAGTACATC3\n", - " ||||||||||||||||||\n", - "3TAAGGACGTCTCATGTAG...TTCTATAGAAGCCTACCA5\n" - ] - } - ], - "source": [ - "print(pcr_product.figure())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Designing Primers and Calculating Tm in pydna\n", - "\n", - "`pydna` also provides the `primer_design` method to design primer sequences based on the desired pcr product and the template sequence's melting temperature (Tm). The `primer_design` method can be imported from the `pydna.design` module, and needs the user to supply the PCR template sequence (as a `Dseqrecord` object) and the Tm. The template sequence should be given as the first parameter, and the Tm give through the `target_tm=` argument, as demonstrated below. If you have no specific Tm in mind, the method uses the default Tm of 55 degrees celcius.\n", - "\n", - "Note that in the following example below, I used zero-based indexing on the `Dseqrecord` to find the sequence of my example gene, of which I would like to clone via PCR. Please refer to the `Dseq` page for more information on how to index a sequence. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS example_gene 45 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_f45 example_gene_r45 example_gene.\n", - "ACCESSION example_gene\n", - "VERSION example_gene\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 1..45\n", - " /label=\"example_gene\"\n", - " primer_bind 1..16\n", - " /label=\"f45\"\n", - " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(29..45)\n", - " /label=\"r45\"\n", - " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCT\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", - "//\n" - ] - } - ], - "source": [ - "from pydna.design import primer_design\n", - "\n", - "#Designing the primers\n", - "primers = primer_design(record[6:51], target_tm=50.0)\n", - "\n", - "#Printing the output\n", - "print(primers.format(\"gb\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The formula for primer design in `pydna` is based on the Tm formula from Rychlik et al (1990), found [here](http://www.ncbi.nlm.nih.gov/pubmed/2243783). Additional information on calculating Tm can be found in the \"Calculating Tm\" section below.\n", - "\n", - "The forward and reverse primer sequences are printed in the features list of the `Amplicon` object. Note how the feature representing the example gene is retained, as appropriate. \n", - "\n", - "If you already have a forward / reverse primer, `primer_design` also allows this information to be taken as arguments. `fp` specifies the forward primer, `rp` specifies the reverse primers. `fp` and `rp` can be should be given as `Primer` class objects, which should be imported from `pydna` too. \n", - "\n", - "For instance, if I already have a forward primer containing an EcoRI restriction site, and I aim to to generate a reverse primer of a similar Tm, I can apply the following code: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS example_gene 51 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_forward_primer example_gene_r45 example_gene.\n", - "ACCESSION example_gene\n", - "VERSION example_gene\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 1..45\n", - " /label=\"example_gene\"\n", - " primer_bind 1..19\n", - " /label=\"f45\"\n", - " /PCR_conditions=\"primer sequence:GAATTCATTCCTGCAGAGTACATCA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(26..45)\n", - " /label=\"r45\"\n", - " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTTCA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 gaattcattc ctgcagagta catcaattct atgaagatat cttcggatgg t\n", - "//\n" - ] - } - ], - "source": [ - "from pydna.primer import Primer\n", - "\n", - "forward_primer = Primer(\"GAATTCATTCCTGCAGAGTACATCA\", id=\"forward_primer\")\n", - "\n", - "primers_sixfive = primer_design(record[6:51], fp = forward_primer)\n", - "\n", - "print(primers_sixfive.format(\"gb\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculating Tm\n", - "\n", - "`pydna` comes with some functions to calculate Tms. The default function `tm_default` used is the previously mentioned one by Rychlik et al (1990), which takes a string as input. Another function derive from the Tm calculation adapted for primers using polymerases with a DNA binding domain (e.g Phusion polymerase). The default values for Tm calculation, including primer concentration, buffer strengths, and more, can also be modified through arguments in the `tm_default` method. Please refer to the `pydna.tm` module docstring for more information. An example is provided with a pair of primers; the temperature is given in degrees celcius." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "55.901005046706075\n", - "55.841913263215304\n" - ] - } - ], - "source": [ - "from pydna.tm import tm_default\n", - "\n", - "# Example Tm calculation for a pair of primers\n", - "primer_f = \"ATTCCTGCAGAGTACATCA\"\n", - "primer_r = \"ACCATCCGAAGATATCTTCA\"\n", - "tm_f = tm_default(primer_f)\n", - "tm_r = tm_default(primer_r)\n", - "\n", - "print(tm_f)\n", - "print(tm_r)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to Perform a Polymerase Chain Reaction (PCR)\n", + "\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "This page walks you through how to perform a PCR, and design PCR primers, using `pydna`. A PCR amplifies a specific stretch of DNA defined by the primers, and it is critical to ensure primer binding specificity and appropriate primer melting temperature (tm) through careful design. `pydna` provides tools for quick modelling of PCR to check for correct PCR products, and methods for calculating tm and primer design, as performed in other popular web servcies (e.g Primer3Plus). \n", + "\n", + "## Modelling PCR with Known Primers\n", + "\n", + "To perform PCR, `pydna` provides the `anneal` class and the `pcr` method to quickly generate expected primer products, on a `Dseqrecord` object. The `pcr` method needs only the forward and reverse primers, and the sequence. The primers must be passed from the 5' to the 3' end, following biological convention. More information on `Dseqrecord` and importing DNA sequences can be found in the other guide pages. \n", + " \n", + "The following example uses a 300+ bp custom sample circular DNA, containing an example gene that we would like to clone. 18 bp forward and reverse primers have been provided. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOCUS 45bp_PCR_prod 45 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_description_description.\n", + "ACCESSION 45bp\n", + "VERSION 45bp\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 1..45\n", + " /label=\"example_gene\"\n", + " primer_bind 1..18\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACATC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(28..45)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTT\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", + "//" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.amplify import pcr\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.parsers import parse\n", + "\n", + "#Importing GenBank file containing sample sequence \n", + "path = \"./sample_seq.gb\"\n", + "record = parse(path)[0]\n", + "\n", + "#Defining forward and reverse primers\n", + "fwd_primer = \"ATTCCTGCAGAGTACATC\"\n", + "rvs_primer = \"ACCATCCGAAGATATCTT\"\n", + "\n", + "#Performing PCR\n", + "pcr_product = pcr(fwd_primer, rvs_primer, record)\n", + "\n", + "#Printing results\n", + "pcr_product.format(\"gb\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `pcr` method then returns a `Amplicon` class object (to model a PCR product), a subclass of `Dseqrecord` with some extra methods (e.g `.figure`. See \"Other ways of visualising the PCR products\" section below). All the features inside the PCR product has been retained. Note how the example gene has been retained as a feature in `pcr_product`. In addition, two new features have been added to the record to indicate the forward and reverse primer binding regions.\n", + "\n", + "`pydna` also allows modelling for PCR with extra bases on the 5' end of primers. This functionality is useful for modelling molecular cloning with multiple steps, where you might want to add different restriction sites to PCR products and ensure that the right sequences have been replicated.\n", + "\n", + "For instance, to make sure that I can add a HindIII restriction site (AAGCTT) at the end of my `example_gene` without accidental hybridisation with other parts of the circular sequence, I can perform PCR in the `pydna` package like so." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOCUS 57bp_PCR_prod 57 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_description_description.\n", + "ACCESSION 57bp\n", + "VERSION 57bp\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " primer_bind 1..21\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:AAGCTTATTCCTGCAGAGTACATC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " gene 4..48\n", + " /label=\"example_gene\"\n", + " primer_bind complement(31..48)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:AAGCTTACCATCCGAAGATATCTT\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 aagcttattc ctgcagagta catcaattct atgaagatat cttcggatgg taagctt\n", + "//" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fwd_primer = \"AAGCTTATTCCTGCAGAGTACATC\"\n", + "rvs_primer = \"AAGCTTACCATCCGAAGATATCTT\"\n", + "\n", + "#Performing PCR\n", + "pcr_product_HindIII = pcr(fwd_primer, rvs_primer, record)\n", + "\n", + "#Printing results\n", + "pcr_product_HindIII.format(\"gb\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more information on restriction digests and ligation, please refer to the Restriction and Ligation page. \n", + "\n", + "## Other ways of visualising the PCR products \n", + "\n", + "In addition to the normal `print` function and the `.format()` method (More information can be found in Dseq and Importing_Seqs pages, respectively), pcr products can also be visualized in other ways.\n", + "\n", + "We can check the sequence of the pcr products alone using the `.seq` attribute on a PCR product:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ATTCCTGCAGAGTACATCAATTCTATGAAGATATCTTCGGATGGT\n" + ] + } + ], + "source": [ + "print(pcr_product.seq)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also visualize the pcr products as a figure, using the `.figure` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5ATTCCTGCAGAGTACATC...AAGATATCTTCGGATGGT3\n", + " ||||||||||||||||||\n", + " 3TTCTATAGAAGCCTACCA5\n", + "5ATTCCTGCAGAGTACATC3\n", + " ||||||||||||||||||\n", + "3TAAGGACGTCTCATGTAG...TTCTATAGAAGCCTACCA5\n" + ] + } + ], + "source": [ + "print(pcr_product.figure())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Designing Primers and Calculating Tm in pydna\n", + "\n", + "`pydna` also provides the `primer_design` method to design primer sequences based on the desired pcr product and the template sequence's melting temperature (Tm). The `primer_design` method can be imported from the `pydna.design` module, and needs the user to supply the PCR template sequence (as a `Dseqrecord` object) and the Tm. The template sequence should be given as the first parameter, and the Tm give through the `target_tm=` argument, as demonstrated below. If you have no specific Tm in mind, the method uses the default Tm of 55 degrees celcius.\n", + "\n", + "Note that in the following example below, I used zero-based indexing on the `Dseqrecord` to find the sequence of my example gene, of which I would like to clone via PCR. Please refer to the `Dseq` page for more information on how to index a sequence. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS example_gene 45 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_f45 example_gene_r45 example_gene.\n", + "ACCESSION example_gene\n", + "VERSION example_gene\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 1..45\n", + " /label=\"example_gene\"\n", + " primer_bind 1..16\n", + " /label=\"f45\"\n", + " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(29..45)\n", + " /label=\"r45\"\n", + " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCT\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", + "//\n" + ] + } + ], + "source": [ + "from pydna.design import primer_design\n", + "\n", + "#Designing the primers\n", + "primers = primer_design(record[6:51], target_tm=50.0)\n", + "\n", + "#Printing the output\n", + "print(primers.format(\"gb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The formula for primer design in `pydna` is based on the Tm formula from Rychlik et al (1990), found [here](http://www.ncbi.nlm.nih.gov/pubmed/2243783). Additional information on calculating Tm can be found in the \"Calculating Tm\" section below.\n", + "\n", + "The forward and reverse primer sequences are printed in the features list of the `Amplicon` object. Note how the feature representing the example gene is retained, as appropriate. \n", + "\n", + "If you already have a forward / reverse primer, `primer_design` also allows this information to be taken as arguments. `fp` specifies the forward primer, `rp` specifies the reverse primers. `fp` and `rp` can be should be given as `Primer` class objects, which should be imported from `pydna` too. \n", + "\n", + "For instance, if I already have a forward primer containing an EcoRI restriction site, and I aim to to generate a reverse primer of a similar Tm, I can apply the following code: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS example_gene 51 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_forward_primer example_gene_r45 example_gene.\n", + "ACCESSION example_gene\n", + "VERSION example_gene\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 1..45\n", + " /label=\"example_gene\"\n", + " primer_bind 1..19\n", + " /label=\"f45\"\n", + " /PCR_conditions=\"primer sequence:GAATTCATTCCTGCAGAGTACATCA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(26..45)\n", + " /label=\"r45\"\n", + " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTTCA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 gaattcattc ctgcagagta catcaattct atgaagatat cttcggatgg t\n", + "//\n" + ] + } + ], + "source": [ + "from pydna.primer import Primer\n", + "\n", + "forward_primer = Primer(\"GAATTCATTCCTGCAGAGTACATCA\", id=\"forward_primer\")\n", + "\n", + "primers_sixfive = primer_design(record[6:51], fp = forward_primer)\n", + "\n", + "print(primers_sixfive.format(\"gb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculating Tm\n", + "\n", + "`pydna` comes with some functions to calculate Tms. The default function `tm_default` used is the previously mentioned one by Rychlik et al (1990), which takes a string as input. Another function derive from the Tm calculation adapted for primers using polymerases with a DNA binding domain (e.g Phusion polymerase). The default values for Tm calculation, including primer concentration, buffer strengths, and more, can also be modified through arguments in the `tm_default` method. Please refer to the `pydna.tm` module docstring for more information. An example is provided with a pair of primers; the temperature is given in degrees celcius." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "55.901005046706075\n", + "55.841913263215304\n" + ] + } + ], + "source": [ + "from pydna.tm import tm_default\n", + "\n", + "# Example Tm calculation for a pair of primers\n", + "primer_f = \"ATTCCTGCAGAGTACATCA\"\n", + "primer_r = \"ACCATCCGAAGATATCTTCA\"\n", + "tm_f = tm_default(primer_f)\n", + "tm_r = tm_default(primer_r)\n", + "\n", + "print(tm_f)\n", + "print(tm_r)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Restrict_Ligate_Cloning.ipynb b/docs/notebooks/Restrict_Ligate_Cloning.ipynb index 45afbefe..978469e5 100644 --- a/docs/notebooks/Restrict_Ligate_Cloning.ipynb +++ b/docs/notebooks/Restrict_Ligate_Cloning.ipynb @@ -1,322 +1,322 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Restriction and Ligation \n", - "> Visit the full library documentation [here](https://bjornfjohansson.github.io/pydna/)\n", - "\n", - "In this page, we explore how to use pydna to cut, ligate, circularise DNA sequences. pydna works in conjugation with the `Bio.Restriction` module to apply a vast variety of restriction enzymes for cutting, whose module documentations can be found [here](https://biopython.org/DIST/docs/cookbook/Restriction.html).\n", - "\n", - "## Cutting with one or more restriction enzymes\n", - "\n", - "Restriction enzymes recognise specific DNA sequences and cut them, leaving sticky ends or blunt ends. To cut a sequence using `pydna`, we can use the `cut` method on a `Dseqrecord` object. Here is an example showing how to use the `cut` method to genenrate EcoRI restriction digests. The record includes a 338bp circular sequence, with an example gene feature." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 338\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 1\n", - "/molecule_type=DNA\n", - "Dseq(-338)\n", - "AATTCTTC..TGTG \n", - " GAAG..ACACTTAA\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.parsers import parse\n", - "from Bio.Restriction import EcoRI\n", - "\n", - "# Create a Dseqrecord with your FASTA/GenBank file\n", - "path = \"./sample_seq.gb\"\n", - "record = parse(path)[0]\n", - "\n", - "# Cut with a single enzyme\n", - "cut_records = record.cut(EcoRI)\n", - "\n", - "# Display the resulting fragments\n", - "for frag in cut_records:\n", - " print(frag)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The circular `Dseqrecord` is cut into a linear `Dseqrecord` object, since there is only one EcoRI recognition site. `Dseqrecord` also shows the 5' sticky end after cutting.\n", - "\n", - "The sequence can also be cut with multiple restriction enzymes, into multiple linear DNA sequences. We can simply import all the restriction enzymes, and use the cut method as normal." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dseqrecord\n", - "circular: False\n", - "size: 51\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-51)\n", - "ATCT..TGTG \n", - "TAGA..ACACTTAA\n", - "\n", - "\n", - "Dseqrecord\n", - "circular: False\n", - "size: 214\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-214)\n", - "AATTCTTC..TGAT\n", - " GAAG..ACTA\n", - "\n", - "\n", - "Dseqrecord\n", - "circular: False\n", - "size: 73\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-73)\n", - "ATCT..AGAT\n", - "TAGA..TCTA\n", - "\n" - ] - } - ], - "source": [ - "from Bio.Restriction import EcoRV\n", - "\n", - "# Cut with a multiple enzymes\n", - "multi_cut_records = record.cut(EcoRI, EcoRV)\n", - "\n", - "# Display the resulting fragments\n", - "for frag in multi_cut_records:\n", - " print()\n", - " print(frag)\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are two EcoRV recognition sites in `sample_seq`, and coupled with the one EcoRI recognition site, three DNA fragments are returned. Note how `Dseqrecord` returns the blunt end after EcoRV cuts. \n", - "\n", - "You can model any, and and number of, enzymes with the `cut` method and `Bio.Restriction` module. This makes `pydna` a quick and powerful method to plan your molecular cloning experiments, for instance to check the restriction digests of a 10kb plasmid with multiple enzymes. `cut` is also a method of the `Dseq` class, so `Dseq`s can be used as well. \n", - "\n", - "## Ligating fragments\n", - "\n", - "After cutting a DNA sequence, you can ligate the fragments back together in `pydna` using the `+` operator on `Dseqrecord` or `Dseq` objects. Ligation can occur via complementary sticky ends or blunt ends. For instance, we can select the first and second fragments from `multi_cut_records` via indexing, and then ligate sticky ends produced by EcoRI to make a single linear sequence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 261\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-261)\n", - "ATCT..TGAT\n", - "TAGA..ACTA\n" - ] - } - ], - "source": [ - "ligated_product = multi_cut_records[0] + multi_cut_records[1]\n", - "print(ligated_product)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also join blunt ends in a similar way. Note that the sticky-ends must be a perfect match to join. If `+` ligation (or any other method, really) doesn't work, make sure that:\n", - "\n", - "1. you are indeed performing the operation on `Dseqrecord` objects, as opposed to other data types (e.g lists, strings, etc)\n", - "2. `Dseqrecord` and the correct enzyme name (with correct roman numeral spelling) has been imported. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Circularizing fragments\n", - "\n", - "To circularize a cut DNA sequence use the `looped` method, which returns a new sequence object.\n", - "\n", - "🚨🚨 **VERY IMPORTANT** 🚨🚨 `.looped()` method does not act in place, so a new variable should be created to store the new circularised sequence, as shown in the following example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "is ligated_product circular? False\n", - "is circular_record circular? True\n", - "\n", - "Dseqrecord\n", - "circular: True\n", - "size: 261\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(o261)\n", - "ATCT..TGAT\n", - "TAGA..ACTA\n" - ] - } - ], - "source": [ - "circular_record = ligated_product.looped()\n", - "\n", - "print('is ligated_product circular?', ligated_product.circular)\n", - "print('is circular_record circular?', circular_record.circular)\n", - "print()\n", - "\n", - "print(circular_record)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra Notes: What happens to features when cutting/ligating?\n", - "\n", - "A feature is removed from a `Dseqrecord` if the features is truncated by the cut. For instance, the example_gene feature is removed from the record after cutting `record` with PstI, which has recognition site within example_gene. within the cutand if the feature is completely within the cut, it is retained. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 222\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-222)\n", - " GAGT..TAACTGCA\n", - "ACGTCTCA..ATTG \n" - ] - } - ], - "source": [ - "from Bio.Restriction import PstI\n", - "\n", - "cut_record2 = record.cut(PstI)\n", - "\n", - "print(cut_record2[0])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "However, if a cut does not overlap with the feature, the feature is retained on the `Dseqrecord`. For instance, if we go back to the first example given by the EcoRI cut, example_gene has been retained after cutting. For more information on Features, please refer to the `Dseq_Feature` documentations." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Restriction and Ligation \n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "In this page, we explore how to use pydna to cut, ligate, circularise DNA sequences. pydna works in conjugation with the `Bio.Restriction` module to apply a vast variety of restriction enzymes for cutting, whose module documentations can be found [here](https://biopython.org/DIST/docs/cookbook/Restriction.html).\n", + "\n", + "## Cutting with one or more restriction enzymes\n", + "\n", + "Restriction enzymes recognise specific DNA sequences and cut them, leaving sticky ends or blunt ends. To cut a sequence using `pydna`, we can use the `cut` method on a `Dseqrecord` object. Here is an example showing how to use the `cut` method to genenrate EcoRI restriction digests. The record includes a 338bp circular sequence, with an example gene feature." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 338\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 1\n", + "/molecule_type=DNA\n", + "Dseq(-338)\n", + "AATTCTTC..TGTG \n", + " GAAG..ACACTTAA\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.parsers import parse\n", + "from Bio.Restriction import EcoRI\n", + "\n", + "# Create a Dseqrecord with your FASTA/GenBank file\n", + "path = \"./sample_seq.gb\"\n", + "record = parse(path)[0]\n", + "\n", + "# Cut with a single enzyme\n", + "cut_records = record.cut(EcoRI)\n", + "\n", + "# Display the resulting fragments\n", + "for frag in cut_records:\n", + " print(frag)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The circular `Dseqrecord` is cut into a linear `Dseqrecord` object, since there is only one EcoRI recognition site. `Dseqrecord` also shows the 5' sticky end after cutting.\n", + "\n", + "The sequence can also be cut with multiple restriction enzymes, into multiple linear DNA sequences. We can simply import all the restriction enzymes, and use the cut method as normal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dseqrecord\n", + "circular: False\n", + "size: 51\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-51)\n", + "ATCT..TGTG \n", + "TAGA..ACACTTAA\n", + "\n", + "\n", + "Dseqrecord\n", + "circular: False\n", + "size: 214\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-214)\n", + "AATTCTTC..TGAT\n", + " GAAG..ACTA\n", + "\n", + "\n", + "Dseqrecord\n", + "circular: False\n", + "size: 73\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-73)\n", + "ATCT..AGAT\n", + "TAGA..TCTA\n", + "\n" + ] + } + ], + "source": [ + "from Bio.Restriction import EcoRV\n", + "\n", + "# Cut with a multiple enzymes\n", + "multi_cut_records = record.cut(EcoRI, EcoRV)\n", + "\n", + "# Display the resulting fragments\n", + "for frag in multi_cut_records:\n", + " print()\n", + " print(frag)\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two EcoRV recognition sites in `sample_seq`, and coupled with the one EcoRI recognition site, three DNA fragments are returned. Note how `Dseqrecord` returns the blunt end after EcoRV cuts. \n", + "\n", + "You can model any, and and number of, enzymes with the `cut` method and `Bio.Restriction` module. This makes `pydna` a quick and powerful method to plan your molecular cloning experiments, for instance to check the restriction digests of a 10kb plasmid with multiple enzymes. `cut` is also a method of the `Dseq` class, so `Dseq`s can be used as well. \n", + "\n", + "## Ligating fragments\n", + "\n", + "After cutting a DNA sequence, you can ligate the fragments back together in `pydna` using the `+` operator on `Dseqrecord` or `Dseq` objects. Ligation can occur via complementary sticky ends or blunt ends. For instance, we can select the first and second fragments from `multi_cut_records` via indexing, and then ligate sticky ends produced by EcoRI to make a single linear sequence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 261\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-261)\n", + "ATCT..TGAT\n", + "TAGA..ACTA\n" + ] + } + ], + "source": [ + "ligated_product = multi_cut_records[0] + multi_cut_records[1]\n", + "print(ligated_product)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also join blunt ends in a similar way. Note that the sticky-ends must be a perfect match to join. If `+` ligation (or any other method, really) doesn't work, make sure that:\n", + "\n", + "1. you are indeed performing the operation on `Dseqrecord` objects, as opposed to other data types (e.g lists, strings, etc)\n", + "2. `Dseqrecord` and the correct enzyme name (with correct roman numeral spelling) has been imported. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Circularizing fragments\n", + "\n", + "To circularize a cut DNA sequence use the `looped` method, which returns a new sequence object.\n", + "\n", + "🚨🚨 **VERY IMPORTANT** 🚨🚨 `.looped()` method does not act in place, so a new variable should be created to store the new circularised sequence, as shown in the following example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "is ligated_product circular? False\n", + "is circular_record circular? True\n", + "\n", + "Dseqrecord\n", + "circular: True\n", + "size: 261\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(o261)\n", + "ATCT..TGAT\n", + "TAGA..ACTA\n" + ] + } + ], + "source": [ + "circular_record = ligated_product.looped()\n", + "\n", + "print('is ligated_product circular?', ligated_product.circular)\n", + "print('is circular_record circular?', circular_record.circular)\n", + "print()\n", + "\n", + "print(circular_record)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra Notes: What happens to features when cutting/ligating?\n", + "\n", + "A feature is removed from a `Dseqrecord` if the features is truncated by the cut. For instance, the example_gene feature is removed from the record after cutting `record` with PstI, which has recognition site within example_gene. within the cutand if the feature is completely within the cut, it is retained. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 222\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-222)\n", + " GAGT..TAACTGCA\n", + "ACGTCTCA..ATTG \n" + ] + } + ], + "source": [ + "from Bio.Restriction import PstI\n", + "\n", + "cut_record2 = record.cut(PstI)\n", + "\n", + "print(cut_record2[0])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, if a cut does not overlap with the feature, the feature is retained on the `Dseqrecord`. For instance, if we go back to the first example given by the EcoRI cut, example_gene has been retained after cutting. For more information on Features, please refer to the `Dseq_Feature` documentations." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/primer_design.ipynb b/docs/notebooks/primer_design.ipynb index 859bb2af..5991f67d 100644 --- a/docs/notebooks/primer_design.ipynb +++ b/docs/notebooks/primer_design.ipynb @@ -9,6 +9,15 @@ "You can use `pydna` for primer design in different contexts, let's start with some basic primer functionalities." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/notebooks/readme_example.ipynb b/docs/notebooks/readme_example.ipynb index 0aa31e1f..022a4076 100644 --- a/docs/notebooks/readme_example.ipynb +++ b/docs/notebooks/readme_example.ipynb @@ -8,7 +8,7 @@ "\n", "This notebook contains the example shown in the README file.\n", "\n", - "\n", + "\n", " \"Open\n", "" ] From 15877cec522ea73819e1cdae7a905b9e6a3d7705 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:07:31 +0000 Subject: [PATCH 2/8] closes #333 --- README.md | 12 +- docs/README.md | 2 +- docs/cookbook/cookbook.ipynb | 2 +- docs/getting_started.md | 2 +- docs/notebooks/CRISPR.ipynb | 2 +- docs/notebooks/Dseq.ipynb | 2 +- docs/notebooks/Dseq_Features.ipynb | 2 +- docs/notebooks/Example_CRISPR.ipynb | 544 ++++++------- docs/notebooks/Example_Gibson.ipynb | 2 +- docs/notebooks/Example_Restriction.ipynb | 2 +- docs/notebooks/Gibson.ipynb | 2 +- docs/notebooks/Importing_Seqs.ipynb | 2 +- docs/notebooks/PCR.ipynb | 2 +- docs/notebooks/Restrict_Ligate_Cloning.ipynb | 2 +- docs/notebooks/readme_example.ipynb | 734 +++++++++--------- pyproject.toml | 14 +- scripts/conda-build/meta (copy).yaml | 6 +- scripts/conda-build/meta (copy2).yaml | 6 +- scripts/conda-build/meta2.yaml | 6 +- .../files_for_annotation_function/post.md | 2 +- src/pydna/__init__.py | 8 +- src/pydna/dseqrecord.py | 6 +- src/pydna/utils.py | 2 +- tests/test_module_amplify.py | 2 +- tests/test_module_dseq.py | 8 +- tests/test_module_dseqrecord.py | 2 +- tests/test_module_utils.py | 4 +- 27 files changed, 691 insertions(+), 689 deletions(-) diff --git a/README.md b/README.md index 5def6106..951a5c6c 100755 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # ![icon](docs/_static/pydna.resized.png) pydna -| [![Tests & Coverage](https://github.com/BjornFJohansson/pydna/actions/workflows/pydna_test_and_coverage_workflow.yml/badge.svg?branch=dev_bjorn)](https://github.com/BjornFJohansson/pydna/actions/workflows/pydna_test_and_coverage_workflow.yml) | [![codecov](https://codecov.io/gh/BjornFJohansson/pydna/branch/master/graph/badge.svg)](https://codecov.io/gh/BjornFJohansson/pydna/branch/master) | [![PyPI version](https://badge.fury.io/py/pydna.svg)](https://badge.fury.io/py/pydna) | [![Google group : pydna](https://img.shields.io/badge/Google%20Group-pydna-blue.svg)](https://groups.google.com/g/pydna) | -| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -| [![Documentation Status](https://github.com/BjornFJohansson/pydna/actions/workflows/publish-docs.yml/badge.svg)](https://github.com/BjornFJohansson/pydna/actions/workflows/publish-docs.yml) | [![GitHub issues](https://img.shields.io/github/issues/BjornFJohansson/pydna.svg)](https://github.com/BjornFJohansson/pydna/issues) | [![Anaconda-Server Badge2](https://anaconda.org/bjornfjohansson/pydna/badges/license.svg)](https://anaconda.org/bjornfjohansson/pydna) | [![GitHub stars](https://img.shields.io/github/stars/BjornFJohansson/pydna.svg)](https://github.com/BjornFJohansson/pydna/stargazers) | +| [![Tests & Coverage](https://github.com/pydna-group/pydna/actions/workflows/pydna_test_and_coverage_workflow.yml/badge.svg?branch=dev_bjorn)](https://github.com/pydna-group/pydna/actions/workflows/pydna_test_and_coverage_workflow.yml) | [![codecov](https://codecov.io/gh/BjornFJohansson/pydna/branch/master/graph/badge.svg)](https://codecov.io/gh/BjornFJohansson/pydna/branch/master) | [![PyPI version](https://badge.fury.io/py/pydna.svg)](https://badge.fury.io/py/pydna) | [![Google group : pydna](https://img.shields.io/badge/Google%20Group-pydna-blue.svg)](https://groups.google.com/g/pydna) | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | +| [![Documentation Status](https://github.com/pydna-group/pydna/actions/workflows/publish-docs.yml/badge.svg)](https://github.com/pydna-group/pydna/actions/workflows/publish-docs.yml) | [![GitHub issues](https://img.shields.io/github/issues/BjornFJohansson/pydna.svg)](https://github.com/pydna-group/pydna/issues) | [![Anaconda-Server Badge2](https://anaconda.org/bjornfjohansson/pydna/badges/license.svg)](https://anaconda.org/bjornfjohansson/pydna) | [![GitHub stars](https://img.shields.io/github/stars/BjornFJohansson/pydna.svg)](https://github.com/pydna-group/pydna/stargazers) | @@ -487,7 +487,7 @@ supported python versions. ### Building the documentation locally 📚 Documentation is built using [Sphinx](http://www.sphinx-doc.org/) from [docstrings](https://www.python.org/dev/peps/pep-0257/) -using a GitHub [action](https://github.com/BjornFJohansson/pydna/actions/workflows/publish-docs.yml). +using a GitHub [action](https://github.com/pydna-group/pydna/actions/workflows/publish-docs.yml). The [numpy](https://www.numpy.org) [docstring format](https://numpy.org/doc/stable/dev/howto-docs.html#docstring-intro) is used. Below the commands to run a local sphinx server that auto-updated when files are changed. @@ -505,7 +505,7 @@ More info about how to contribute to the documentation can be found [here](docs/ ## Release process 🚀 -See the [releases](https://github.com/BjornFJohansson/pydna/releases) for changes and releases. +See the [releases](https://github.com/pydna-group/pydna/releases) for changes and releases. The build workflow builds a PyPI packages using poetry. This workflow is triggered by publishing a Github release manually from the Github web interface. @@ -535,4 +535,4 @@ In Cold Spring Harbor Laboratory (p. 2021.01.17.427048). [DOI](https://doi.org/1 [An Automated Protein Synthesis Pipeline with Transcriptic and Snakemake](http://blog.booleanbiotech.com/transcriptic_protein_synthesis_pipeline.html) -and other projects on [github](https://github.com/BjornFJohansson/pydna/network/dependents?package_id=UGFja2FnZS01MjQ2MjYzNQ%3D%3D) +and other projects on [github](https://github.com/pydna-group/pydna/network/dependents?package_id=UGFja2FnZS01MjQ2MjYzNQ%3D%3D) diff --git a/docs/README.md b/docs/README.md index fa6f252b..3975d263 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,7 +1,7 @@ # Documentation Documentation is built using [Sphinx](http://www.sphinx-doc.org/) from [docstrings](https://www.python.org/dev/peps/pep-0257/) -using a GitHub [action](https://github.com/BjornFJohansson/pydna/actions/workflows/publish-docs.yml). +using a GitHub [action](https://github.com/pydna-group/pydna/actions/workflows/publish-docs.yml). The [numpy](www.numpy.org) [docstring format](https://numpy.org/doc/stable/dev/howto-docs.html#docstring-intro) is used. Below the commands to run a local sphinx server that auto-updated when files are changed. diff --git a/docs/cookbook/cookbook.ipynb b/docs/cookbook/cookbook.ipynb index ab8748be..6605dcc3 100644 --- a/docs/cookbook/cookbook.ipynb +++ b/docs/cookbook/cookbook.ipynb @@ -21,7 +21,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", + "\n", " \"Open\n", "" ] diff --git a/docs/getting_started.md b/docs/getting_started.md index c5f6c7fc..961e3959 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -2,7 +2,7 @@ The best way to get started is to follow our tutorial notebooks, you can find them in the -repository folder [docs/notebooks](https://github.com/BjornFJohansson/pydna/tree/main/docs/notebooks) and +repository folder [docs/notebooks](https://github.com/pydna-group/pydna/tree/master/docs/notebooks) and are reproduced here so they can serve as documentation. You can run the notebooks locally, but also on the browser using Google Colab, simply click in the badge of diff --git a/docs/notebooks/CRISPR.ipynb b/docs/notebooks/CRISPR.ipynb index 6fdd7525..7c223dbd 100644 --- a/docs/notebooks/CRISPR.ipynb +++ b/docs/notebooks/CRISPR.ipynb @@ -33,7 +33,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Dseq.ipynb b/docs/notebooks/Dseq.ipynb index ce4a1401..1073ef34 100644 --- a/docs/notebooks/Dseq.ipynb +++ b/docs/notebooks/Dseq.ipynb @@ -28,7 +28,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Dseq_Features.ipynb b/docs/notebooks/Dseq_Features.ipynb index 8dd0c903..68d36576 100644 --- a/docs/notebooks/Dseq_Features.ipynb +++ b/docs/notebooks/Dseq_Features.ipynb @@ -35,7 +35,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Example_CRISPR.ipynb b/docs/notebooks/Example_CRISPR.ipynb index f3cb4254..6251d842 100644 --- a/docs/notebooks/Example_CRISPR.ipynb +++ b/docs/notebooks/Example_CRISPR.ipynb @@ -1,273 +1,273 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Implementation of Oligonucleotide-based CRISPR-Cas9 toolbox for efficient engineering of Komagataella phaffii \n", - "\n", - "\n", - "In this example we wanted to give a real life intuition on how to use the module in practice. \n", - "\n", - "For this purpose we have chosen to use the oligonucleotide-based CRISPR-Cas9 toolbox that i described \n", - "here by Strucko et al 2024, in the industrially relevant K. phaffi production organism: \n", - "\n", - "https://academic.oup.com/femsyr/article/doi/10.1093/femsyr/foae026/7740463?login=false " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from IPython.display import Image\n", - "Image(url=\"https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/femsyr/24/10.1093_femsyr_foae026/1/m_foae026fig3.jpeg?Expires=1730974846&Signature=iBKvkhkUn1823IljQ~1uFEnKO0VqWrwiXADvCwQLz6Yv8yDEAFkgt~tsLrXKFTmGYIq3ZINcj5a5yNgs4cP4NeCvRcQh7Ad~1ZejIwNrjqw51CJhGcZWPzz~NDr93QVLZZd2Re41cJNFKFmEu756KxrHQxwKTQe2QPMPfiKBvhvo8J28PERj3vNjZ3LQRsFp9qUPpdsZEyWIiNY92jsuy448YyuaGCgaC2ExGDLeuArTEJmq8gtb0QnTPV0dEdtoxIfZpgavdvO~QyqikjCLj6hebUYU1lH7StuS8oqCQE82CXO0IUcjYF6m2Lb0evXhqdLDQe90M-NrKjzNRmBA0A__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Figure 1. oligo assisted repair in K. phaffi. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "- Basically we can make two cuts in the genome, and repair it with an oligo (Figure 1A, 1B).\n", - "\n", - "\n", - "- We can start by loading in our target. Here we have integrated LAC12 in our K. phaffi strain but want to knock it out. \n", - "\n", - "\n", - "- Let's see how this can be implemented in pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import the gene we are going to work with" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 7127\n", - "ID: X06997.1\n", - "Name: X06997\n", - "Description: Kluyveromyces lactis LAC12 gene for lactose permease\n", - "Number of features: 8\n", - "/molecule_type=DNA\n", - "/topology=linear\n", - "/data_file_division=PLN\n", - "/date=25-JUL-2016\n", - "/accessions=['X06997']\n", - "/sequence_version=1\n", - "/keywords=['lactose permease', 'unidentified reading frame']\n", - "/source=Kluyveromyces lactis\n", - "/organism=Kluyveromyces lactis\n", - "/taxonomy=['Eukaryota', 'Fungi', 'Dikarya', 'Ascomycota', 'Saccharomycotina', 'Saccharomycetes', 'Saccharomycetales', 'Saccharomycetaceae', 'Kluyveromyces']\n", - "/references=[Reference(title='Primary structure of the lactose permease gene from the yeast Kluyveromyces lactis. Presence of an unusual transcript structure', ...), Reference(title='Direct Submission', ...)]\n", - "/comment=the sequence submitted starts from the 5'end of LAC4 gene but goes\n", - "to the opposite direction; therefore, base number 1 is -1199 of\n", - "LAC4 gene; for LAC4 gene seq. see\n", - "Mol. Cell. Biol. (1987)7,4369-4376.\n", - "Dseq(-7127)\n", - "GCGA..TTCG\n", - "CGCT..AAGC\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.crispr import cas9, protospacer\n", - "from pydna.genbank import Genbank\n", - "\n", - "# initalize your favourite gene\n", - "gb = Genbank(\"myself@email.com\") # Tell Genbank who you are!\n", - "gene = gb.nucleotide(\"X06997\") # Kluyveromyces lactis LAC12 gene for lactose permease that have been integrated into K. phaffi\n", - "target_dseq = Dseqrecord(gene)\n", - "print(target_dseq)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next we have chosen some guides and can add them to our cas9 enzymes and simulate the cuts." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cutting with guide 1: (Dseqrecord(-135), Dseqrecord(-6992))\n", - "cutting with guide 2: (Dseqrecord(-6793), Dseqrecord(-334))\n" - ] - } - ], - "source": [ - "\n", - "# Choose guides\n", - "guides = [\"CCCTAAGTCCTTTGAAGATT\", \"TATTATTTTGAGGTGCTTTA\"]\n", - "\n", - "# Create an enzyme object with the protospacer\n", - "enzyme = cas9(guides[0])\n", - "\n", - "# Simulate the cut with enzyme1\n", - "print('cutting with guide 1:', target_dseq.cut(enzyme))\n", - "\n", - "# Create an enzyme from the protospacer\n", - "enzyme2 = cas9(guides[1])\n", - "\n", - "# Simulate the cut with enzyme2\n", - "print('cutting with guide 2:', target_dseq.cut(enzyme2))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With these guides I would be able to generate a stable KO with a repair 60/90mer oligo." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My repair oligo for this experiment : AGGTGAACACACTCTGATGTAGTGCAGTCCCTAAGTCCTTTGAAGTTACGGACTCCTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTC \n", - "My repair oligo for this experiment length : 90 \n" - ] - } - ], - "source": [ - "repair_oligo = target_dseq.cut(enzyme)[0][-45:]+target_dseq.cut(enzyme2)[-1][:45]\n", - "repair_oligo.name = 'My repair oligo for this experiment'\n", - "print(f'{repair_oligo.name} : {repair_oligo.seq} ')\n", - "print(f'{repair_oligo.name} length : {len(repair_oligo.seq)} ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The final edit gene would look like this in a case of homologous recombination. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
name|45\n",
-       "     \\/\n",
-       "     /\\\n",
-       "     45|My repair oligo for this experiment|45\n",
-       "                                            \\/\n",
-       "                                            /\\\n",
-       "                                            45|name
" - ], - "text/plain": [ - "Contig(-469)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.assembly import Assembly\n", - "\n", - "my_KO = Assembly((target_dseq.cut(enzyme)[0],repair_oligo, target_dseq.cut(enzyme2)[-1]), limit = 20 )\n", - "my_assembly_KO, *rest = my_KO.assemble_linear()\n", - "my_assembly_KO" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Implementation of Oligonucleotide-based CRISPR-Cas9 toolbox for efficient engineering of Komagataella phaffii \n", + "\n", + "\n", + "In this example we wanted to give a real life intuition on how to use the module in practice. \n", + "\n", + "For this purpose we have chosen to use the oligonucleotide-based CRISPR-Cas9 toolbox that i described \n", + "here by Strucko et al 2024, in the industrially relevant K. phaffi production organism: \n", + "\n", + "https://academic.oup.com/femsyr/article/doi/10.1093/femsyr/foae026/7740463?login=false " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Image\n", + "Image(url=\"https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/femsyr/24/10.1093_femsyr_foae026/1/m_foae026fig3.jpeg?Expires=1730974846&Signature=iBKvkhkUn1823IljQ~1uFEnKO0VqWrwiXADvCwQLz6Yv8yDEAFkgt~tsLrXKFTmGYIq3ZINcj5a5yNgs4cP4NeCvRcQh7Ad~1ZejIwNrjqw51CJhGcZWPzz~NDr93QVLZZd2Re41cJNFKFmEu756KxrHQxwKTQe2QPMPfiKBvhvo8J28PERj3vNjZ3LQRsFp9qUPpdsZEyWIiNY92jsuy448YyuaGCgaC2ExGDLeuArTEJmq8gtb0QnTPV0dEdtoxIfZpgavdvO~QyqikjCLj6hebUYU1lH7StuS8oqCQE82CXO0IUcjYF6m2Lb0evXhqdLDQe90M-NrKjzNRmBA0A__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Figure 1. oligo assisted repair in K. phaffi. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "- Basically we can make two cuts in the genome, and repair it with an oligo (Figure 1A, 1B).\n", + "\n", + "\n", + "- We can start by loading in our target. Here we have integrated LAC12 in our K. phaffi strain but want to knock it out. \n", + "\n", + "\n", + "- Let's see how this can be implemented in pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import the gene we are going to work with" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 7127\n", + "ID: X06997.1\n", + "Name: X06997\n", + "Description: Kluyveromyces lactis LAC12 gene for lactose permease\n", + "Number of features: 8\n", + "/molecule_type=DNA\n", + "/topology=linear\n", + "/data_file_division=PLN\n", + "/date=25-JUL-2016\n", + "/accessions=['X06997']\n", + "/sequence_version=1\n", + "/keywords=['lactose permease', 'unidentified reading frame']\n", + "/source=Kluyveromyces lactis\n", + "/organism=Kluyveromyces lactis\n", + "/taxonomy=['Eukaryota', 'Fungi', 'Dikarya', 'Ascomycota', 'Saccharomycotina', 'Saccharomycetes', 'Saccharomycetales', 'Saccharomycetaceae', 'Kluyveromyces']\n", + "/references=[Reference(title='Primary structure of the lactose permease gene from the yeast Kluyveromyces lactis. Presence of an unusual transcript structure', ...), Reference(title='Direct Submission', ...)]\n", + "/comment=the sequence submitted starts from the 5'end of LAC4 gene but goes\n", + "to the opposite direction; therefore, base number 1 is -1199 of\n", + "LAC4 gene; for LAC4 gene seq. see\n", + "Mol. Cell. Biol. (1987)7,4369-4376.\n", + "Dseq(-7127)\n", + "GCGA..TTCG\n", + "CGCT..AAGC\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.crispr import cas9, protospacer\n", + "from pydna.genbank import Genbank\n", + "\n", + "# initalize your favourite gene\n", + "gb = Genbank(\"myself@email.com\") # Tell Genbank who you are!\n", + "gene = gb.nucleotide(\"X06997\") # Kluyveromyces lactis LAC12 gene for lactose permease that have been integrated into K. phaffi\n", + "target_dseq = Dseqrecord(gene)\n", + "print(target_dseq)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we have chosen some guides and can add them to our cas9 enzymes and simulate the cuts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cutting with guide 1: (Dseqrecord(-135), Dseqrecord(-6992))\n", + "cutting with guide 2: (Dseqrecord(-6793), Dseqrecord(-334))\n" + ] + } + ], + "source": [ + "\n", + "# Choose guides\n", + "guides = [\"CCCTAAGTCCTTTGAAGATT\", \"TATTATTTTGAGGTGCTTTA\"]\n", + "\n", + "# Create an enzyme object with the protospacer\n", + "enzyme = cas9(guides[0])\n", + "\n", + "# Simulate the cut with enzyme1\n", + "print('cutting with guide 1:', target_dseq.cut(enzyme))\n", + "\n", + "# Create an enzyme from the protospacer\n", + "enzyme2 = cas9(guides[1])\n", + "\n", + "# Simulate the cut with enzyme2\n", + "print('cutting with guide 2:', target_dseq.cut(enzyme2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With these guides I would be able to generate a stable KO with a repair 60/90mer oligo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My repair oligo for this experiment : AGGTGAACACACTCTGATGTAGTGCAGTCCCTAAGTCCTTTGAAGTTACGGACTCCTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTC \n", + "My repair oligo for this experiment length : 90 \n" + ] + } + ], + "source": [ + "repair_oligo = target_dseq.cut(enzyme)[0][-45:]+target_dseq.cut(enzyme2)[-1][:45]\n", + "repair_oligo.name = 'My repair oligo for this experiment'\n", + "print(f'{repair_oligo.name} : {repair_oligo.seq} ')\n", + "print(f'{repair_oligo.name} length : {len(repair_oligo.seq)} ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The final edit gene would look like this in a case of homologous recombination. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
name|45\n",
+                            "     \\/\n",
+                            "     /\\\n",
+                            "     45|My repair oligo for this experiment|45\n",
+                            "                                            \\/\n",
+                            "                                            /\\\n",
+                            "                                            45|name
" + ], + "text/plain": [ + "Contig(-469)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.assembly import Assembly\n", + "\n", + "my_KO = Assembly((target_dseq.cut(enzyme)[0],repair_oligo, target_dseq.cut(enzyme2)[-1]), limit = 20 )\n", + "my_assembly_KO, *rest = my_KO.assemble_linear()\n", + "my_assembly_KO" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/notebooks/Example_Gibson.ipynb b/docs/notebooks/Example_Gibson.ipynb index 490148b8..f99f4f31 100755 --- a/docs/notebooks/Example_Gibson.ipynb +++ b/docs/notebooks/Example_Gibson.ipynb @@ -32,7 +32,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Example_Restriction.ipynb b/docs/notebooks/Example_Restriction.ipynb index b50edcec..91406719 100755 --- a/docs/notebooks/Example_Restriction.ipynb +++ b/docs/notebooks/Example_Restriction.ipynb @@ -36,7 +36,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Gibson.ipynb b/docs/notebooks/Gibson.ipynb index 5bba334b..0a739436 100644 --- a/docs/notebooks/Gibson.ipynb +++ b/docs/notebooks/Gibson.ipynb @@ -37,7 +37,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Importing_Seqs.ipynb b/docs/notebooks/Importing_Seqs.ipynb index b606c5ab..16f079fb 100755 --- a/docs/notebooks/Importing_Seqs.ipynb +++ b/docs/notebooks/Importing_Seqs.ipynb @@ -39,7 +39,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/PCR.ipynb b/docs/notebooks/PCR.ipynb index 1ecfe0b4..81c48079 100755 --- a/docs/notebooks/PCR.ipynb +++ b/docs/notebooks/PCR.ipynb @@ -37,7 +37,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/Restrict_Ligate_Cloning.ipynb b/docs/notebooks/Restrict_Ligate_Cloning.ipynb index 978469e5..55fc35f7 100644 --- a/docs/notebooks/Restrict_Ligate_Cloning.ipynb +++ b/docs/notebooks/Restrict_Ligate_Cloning.ipynb @@ -34,7 +34,7 @@ "if 'google.colab' in sys.modules:\n", " %%capture\n", " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", " # Install pip version instead (uncomment to install)\n", " # !pip install pydna\n" ] diff --git a/docs/notebooks/readme_example.ipynb b/docs/notebooks/readme_example.ipynb index 022a4076..68992ece 100644 --- a/docs/notebooks/readme_example.ipynb +++ b/docs/notebooks/readme_example.ipynb @@ -1,369 +1,369 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## README Example\n", - "\n", - "This notebook contains the example shown in the README file.\n", - "\n", - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/BjornFJohansson/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(-60)\n", - "\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0m\n", - "TACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTA" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "# Let's create a DNA sequence record, and add a feature to it\n", - "dsr = Dseqrecord(\"ATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\")\n", - "dsr.add_feature(x=0, y=60,type=\"gene\", label=\"my_gene\") # We add a feature to highlight the sequence as a gene\n", - "dsr.figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS name 60 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " misc 1..60\n", - " /type=\"gene\"\n", - " /label=\"my_gene\"\n", - "ORIGIN\n", - " 1 atgcaaacag taatgatgga tgacattcaa agcactgatt ctattgctga aaaagataat\n", - "//\n" - ] - } - ], - "source": [ - "# This is how it would look as a genbank file\n", - "print(dsr.format(\"genbank\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "forward primer: ATGCAAACAGTAATGATGGA\n", - "reverse primer: ATTATCTTTTTCAGCAATAGAATCA\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## README Example\n", + "\n", + "This notebook contains the example shown in the README file.\n", + "\n", + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %%capture\n", + " # Install the current development version of pydna (comment to install pip version)\n", + " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", + " # Install pip version instead (uncomment to install)\n", + " # !pip install pydna" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(-60)\n", + "\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0m\n", + "TACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTA" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "# Let's create a DNA sequence record, and add a feature to it\n", + "dsr = Dseqrecord(\"ATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\")\n", + "dsr.add_feature(x=0, y=60,type=\"gene\", label=\"my_gene\") # We add a feature to highlight the sequence as a gene\n", + "dsr.figure()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS name 60 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " misc 1..60\n", + " /type=\"gene\"\n", + " /label=\"my_gene\"\n", + "ORIGIN\n", + " 1 atgcaaacag taatgatgga tgacattcaa agcactgatt ctattgctga aaaagataat\n", + "//\n" + ] + } + ], + "source": [ + "# This is how it would look as a genbank file\n", + "print(dsr.format(\"genbank\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "forward primer: ATGCAAACAGTAATGATGGA\n", + "reverse primer: ATTATCTTTTTCAGCAATAGAATCA\n" + ] + }, + { + "data": { + "text/plain": [ + "5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", + " |||||||||||||||||||||||||\n", + " 3ACTAAGATAACGACTTTTTCTATTA5\n", + "5ATGCAAACAGTAATGATGGA3\n", + " ||||||||||||||||||||\n", + "3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now let's design primers to amplify it\n", + "from pydna.design import primer_design\n", + "# limit is the minimum length of the primer, target_tm is the desired melting temperature of the primer\n", + "amplicon = primer_design(dsr, limit=13, target_tm=55)\n", + "# Let's print the primers, and a figure that shows where they align with the template sequence\n", + "print(\"forward primer:\", amplicon.forward_primer.seq)\n", + "print(\"reverse primer:\", amplicon.reverse_primer.seq)\n", + "amplicon.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " 5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", + " |||||||||||||||||||||||||\n", + " 3ACTAAGATAACGACTTTTTCTATTACCTAGGtttt5\n", + "5ccccGGATCCATGCAAACAGTAATGATGGA3\n", + " ||||||||||||||||||||\n", + " 3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's say we don't want to just amplify it, but we want to add restriction sites to it!\n", + "\n", + "from pydna.amplify import pcr\n", + "# We add the restriction sites to the primers\n", + "forward_primer = \"ccccGGATCC\" + amplicon.forward_primer\n", + "reverse_primer = \"ttttGGATCC\" + amplicon.reverse_primer\n", + "\n", + "# We do the PCR\n", + "pcr_product = pcr(forward_primer, reverse_primer, dsr)\n", + "# The PCR product is of class `Amplicon`, a subclass of `Dseqrecord`.\n", + "# When doing a figure, it shows where primers anneal.\n", + "pcr_product.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(-80)\n", + "ccccGGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCCaaaa\n", + "ggggCCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGGtttt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# If we want to see the sequence more clearly, we can turn it into a `Dseqrecord`\n", + "pcr_product = Dseqrecord(pcr_product)\n", + "pcr_product.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord(-9)\n", + "\u001b[48;5;11m\u001b[0mccccG \n", + "ggggCCTAG\n", + "\n", + "Dseqrecord(-70)\n", + "GATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mG \n", + " GTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAG\n", + "\n", + "Dseqrecord(-9)\n", + "\u001b[48;5;11m\u001b[0mGATCCaaaa\n", + " Gtttt\n" + ] + } + ], + "source": [ + "from Bio.Restriction import BamHI # cuts GGATCC\n", + "# a, payload, c are the cut fragments\n", + "a, payload, c = pcr_product.cut (BamHI)\n", + "print(a.figure())\n", + "print()\n", + "print (payload.figure())\n", + "print()\n", + "print(c.figure())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(o50)\n", + "\u001b[48;5;11m\u001b[0maatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\n", + "ttacaaaaagggaGGGCCCgttttaTCTAGAacgatacgtagtagctaga" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We create a circular vector to insert the amplicon into\n", + "vector = Dseqrecord(\"aatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\", circular=True, name=\"vect\")\n", + "vector.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(o116)\n", + "aatgtttttccctCCCGGGcaaaatAGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCTtgctatgcatcatcgatct\n", + "ttacaaaaagggaGGGCCCgttttaTCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGAacgatacgtagtagctaga" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from Bio.Restriction import BglII # cuts AGATCT\n", + "linear_vector_bgl = vector.cut(BglII)[0] # Linearize the vector at BglII (produces only one fragment)\n", + "\n", + "# Ligate the fragment of interest to the vector, and call looped() to circularize it\n", + "# synced is used to place the origin coordinate (0) in the same place for rec_vector and vector\n", + "rec_vector= (linear_vector_bgl + payload).looped().synced(vector)\n", + "rec_vector.figure()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " -|fragment_A|13\n", + "| \\/\n", + "| /\\\n", + "| 13|fragment_B|13\n", + "| \\/\n", + "| /\\\n", + "| 13|fragment_C|13\n", + "| \\/\n", + "| /\\\n", + "| 13-\n", + "| |\n", + " --------------------------------------------" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's simulate a Gibson assembly\n", + "from pydna.assembly import Assembly\n", + "\n", + "fragments = [\n", + " Dseqrecord('aatgtttttccctCACTACGtgctatgcatcat', name=\"fragment_A\"),\n", + " Dseqrecord('tgctatgcatcatCTATGGAcactctaataatg', name=\"fragment_B\"),\n", + " Dseqrecord('cactctaataatgTTACATAaatgtttttccct', name=\"fragment_C\"),\n", + "]\n", + "\n", + "# limit is the min. homology length between fragments in the assembly\n", + "asm = Assembly(fragments, limit=10)\n", + "\n", + "# From the assembly object, which can generate all possible products, get a circular\n", + "product, *rest = asm.assemble_circular()\n", + "\n", + "# We can print a figure that shows the overlaps between fragments\n", + "product.figure()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(o60)\n", + "\u001b[48;5;11m\u001b[0maatgtttttccctCACTACGtgctatgcatcatCTATGGAcactctaataatgTTACATA\n", + "ttacaaaaagggaGTGATGCacgatacgtagtaGATACCTgtgagattattacAATGTAT" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Or show the final sequence:\n", + "Dseqrecord(product).figure()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } }, - { - "data": { - "text/plain": [ - "5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", - " |||||||||||||||||||||||||\n", - " 3ACTAAGATAACGACTTTTTCTATTA5\n", - "5ATGCAAACAGTAATGATGGA3\n", - " ||||||||||||||||||||\n", - "3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Now let's design primers to amplify it\n", - "from pydna.design import primer_design\n", - "# limit is the minimum length of the primer, target_tm is the desired melting temperature of the primer\n", - "amplicon = primer_design(dsr, limit=13, target_tm=55)\n", - "# Let's print the primers, and a figure that shows where they align with the template sequence\n", - "print(\"forward primer:\", amplicon.forward_primer.seq)\n", - "print(\"reverse primer:\", amplicon.reverse_primer.seq)\n", - "amplicon.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " 5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", - " |||||||||||||||||||||||||\n", - " 3ACTAAGATAACGACTTTTTCTATTACCTAGGtttt5\n", - "5ccccGGATCCATGCAAACAGTAATGATGGA3\n", - " ||||||||||||||||||||\n", - " 3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's say we don't want to just amplify it, but we want to add restriction sites to it!\n", - "\n", - "from pydna.amplify import pcr\n", - "# We add the restriction sites to the primers\n", - "forward_primer = \"ccccGGATCC\" + amplicon.forward_primer\n", - "reverse_primer = \"ttttGGATCC\" + amplicon.reverse_primer\n", - "\n", - "# We do the PCR\n", - "pcr_product = pcr(forward_primer, reverse_primer, dsr)\n", - "# The PCR product is of class `Amplicon`, a subclass of `Dseqrecord`.\n", - "# When doing a figure, it shows where primers anneal.\n", - "pcr_product.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(-80)\n", - "ccccGGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCCaaaa\n", - "ggggCCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGGtttt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# If we want to see the sequence more clearly, we can turn it into a `Dseqrecord`\n", - "pcr_product = Dseqrecord(pcr_product)\n", - "pcr_product.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord(-9)\n", - "\u001b[48;5;11m\u001b[0mccccG \n", - "ggggCCTAG\n", - "\n", - "Dseqrecord(-70)\n", - "GATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mG \n", - " GTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAG\n", - "\n", - "Dseqrecord(-9)\n", - "\u001b[48;5;11m\u001b[0mGATCCaaaa\n", - " Gtttt\n" - ] - } - ], - "source": [ - "from Bio.Restriction import BamHI # cuts GGATCC\n", - "# a, payload, c are the cut fragments\n", - "a, payload, c = pcr_product.cut (BamHI)\n", - "print(a.figure())\n", - "print()\n", - "print (payload.figure())\n", - "print()\n", - "print(c.figure())\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(o50)\n", - "\u001b[48;5;11m\u001b[0maatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\n", - "ttacaaaaagggaGGGCCCgttttaTCTAGAacgatacgtagtagctaga" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We create a circular vector to insert the amplicon into\n", - "vector = Dseqrecord(\"aatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\", circular=True, name=\"vect\")\n", - "vector.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(o116)\n", - "aatgtttttccctCCCGGGcaaaatAGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCTtgctatgcatcatcgatct\n", - "ttacaaaaagggaGGGCCCgttttaTCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGAacgatacgtagtagctaga" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from Bio.Restriction import BglII # cuts AGATCT\n", - "linear_vector_bgl = vector.cut(BglII)[0] # Linearize the vector at BglII (produces only one fragment)\n", - "\n", - "# Ligate the fragment of interest to the vector, and call looped() to circularize it\n", - "# synced is used to place the origin coordinate (0) in the same place for rec_vector and vector\n", - "rec_vector= (linear_vector_bgl + payload).looped().synced(vector)\n", - "rec_vector.figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " -|fragment_A|13\n", - "| \\/\n", - "| /\\\n", - "| 13|fragment_B|13\n", - "| \\/\n", - "| /\\\n", - "| 13|fragment_C|13\n", - "| \\/\n", - "| /\\\n", - "| 13-\n", - "| |\n", - " --------------------------------------------" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's simulate a Gibson assembly\n", - "from pydna.assembly import Assembly\n", - "\n", - "fragments = [\n", - " Dseqrecord('aatgtttttccctCACTACGtgctatgcatcat', name=\"fragment_A\"),\n", - " Dseqrecord('tgctatgcatcatCTATGGAcactctaataatg', name=\"fragment_B\"),\n", - " Dseqrecord('cactctaataatgTTACATAaatgtttttccct', name=\"fragment_C\"),\n", - "]\n", - "\n", - "# limit is the min. homology length between fragments in the assembly\n", - "asm = Assembly(fragments, limit=10)\n", - "\n", - "# From the assembly object, which can generate all possible products, get a circular\n", - "product, *rest = asm.assemble_circular()\n", - "\n", - "# We can print a figure that shows the overlaps between fragments\n", - "product.figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(o60)\n", - "\u001b[48;5;11m\u001b[0maatgtttttccctCACTACGtgctatgcatcatCTATGGAcactctaataatgTTACATA\n", - "ttacaaaaagggaGTGATGCacgatacgtagtaGATACCTgtgagattattacAATGTAT" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Or show the final sequence:\n", - "Dseqrecord(product).figure()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e474b013..fa4c5b63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,8 @@ [project] -authors = ["Björn F. Johansson"] +authors = [ + {name = "Björn F. Johansson", email = "bjornjobb@gmail.com"}, + {name = "Manuel Lera-Ramirez", email = "manulera14@gmail.com"} +] classifiers = [ "Development Status :: 4 - Beta", "Environment :: Console", @@ -21,16 +24,15 @@ dynamic = ["version"] name = "pydna" readme = "README.md" [tool.poetry.urls] -Changelog = "https://github.com/BjornFJohansson/pydna/blob/master/docs/CHANGELOG.md#changelog" +Changelog = "https://github.com/pydna-group/pydna/blob/master/docs/CHANGELOG.md#changelog" [tool.poetry] -authors = ["BjornFJohansson "] description = "Representing double stranded DNA and functions for simulating cloning and homologous recombination between DNA molecules." -documentation = "https://pydna.readthedocs.io/?badge=latest" -homepage = "https://github.com/BjornFJohansson/pydna#-pydna" +documentation = "https://pydna-group.github.io/pydna" +homepage = "https://github.com/pydna-group/pydna#-pydna" license = "BSD" name = "pydna" readme = "README.md" -repository = "https://github.com/BjornFJohansson/pydna/tree/dev_bjorn" +repository = "https://github.com/pydna-group/pydna/tree/master" version = "6.0.0-a.24.post.17+b7b559bd66" [tool.poetry.dependencies] appdirs = ">=1.4.4" diff --git a/scripts/conda-build/meta (copy).yaml b/scripts/conda-build/meta (copy).yaml index bba4f2eb..59817340 100755 --- a/scripts/conda-build/meta (copy).yaml +++ b/scripts/conda-build/meta (copy).yaml @@ -46,14 +46,14 @@ test: - src - src/pydna about: - home: https://github.com/BjornFJohansson/pydna + home: https://github.com/pydna-group/pydna license: BSD-3-Clause license_family: BSD license_file: LICENSE.txt summary: Representing double stranded DNA and simulating cloning, homologous recombination, Gibson assembly, Gel electrophoresis etc. description: Representing double stranded DNA and simulating cloning, homologous recombination, Gibson assembly, Gel electrophoresis etc. - doc_url: https://pydna.readthedocs.io - dev_url: https://github.com/BjornFJohansson/pydna + doc_url: https://pydna-group.github.io/pydna + dev_url: https://github.com/pydna-group/pydna extra: maintainers: - "Björn Johansson " diff --git a/scripts/conda-build/meta (copy2).yaml b/scripts/conda-build/meta (copy2).yaml index 0cd73f41..be76e2d2 100755 --- a/scripts/conda-build/meta (copy2).yaml +++ b/scripts/conda-build/meta (copy2).yaml @@ -48,14 +48,14 @@ test: - src - src/pydna about: - home: https://github.com/BjornFJohansson/pydna + home: https://github.com/pydna-group/pydna license: BSD-3-Clause license_family: BSD license_file: LICENSE.txt summary: Representing double stranded DNA and simulating cloning, homologous recombination, Gibson assembly, Gel electrophoresis etc. description: Representing double stranded DNA and simulating cloning, homologous recombination, Gibson assembly, Gel electrophoresis etc. - doc_url: https://pydna.readthedocs.io - dev_url: https://github.com/BjornFJohansson/pydna + doc_url: https://pydna-group.github.io/pydna + dev_url: https://github.com/pydna-group/pydna extra: maintainers: - "Björn Johansson " diff --git a/scripts/conda-build/meta2.yaml b/scripts/conda-build/meta2.yaml index bba4f2eb..59817340 100755 --- a/scripts/conda-build/meta2.yaml +++ b/scripts/conda-build/meta2.yaml @@ -46,14 +46,14 @@ test: - src - src/pydna about: - home: https://github.com/BjornFJohansson/pydna + home: https://github.com/pydna-group/pydna license: BSD-3-Clause license_family: BSD license_file: LICENSE.txt summary: Representing double stranded DNA and simulating cloning, homologous recombination, Gibson assembly, Gel electrophoresis etc. description: Representing double stranded DNA and simulating cloning, homologous recombination, Gibson assembly, Gel electrophoresis etc. - doc_url: https://pydna.readthedocs.io - dev_url: https://github.com/BjornFJohansson/pydna + doc_url: https://pydna-group.github.io/pydna + dev_url: https://github.com/pydna-group/pydna extra: maintainers: - "Björn Johansson " diff --git a/scripts/features/files_for_annotation_function/post.md b/scripts/features/files_for_annotation_function/post.md index ca2aaeaf..6b4b2dfe 100644 --- a/scripts/features/files_for_annotation_function/post.md +++ b/scripts/features/files_for_annotation_function/post.md @@ -2,7 +2,7 @@ genocad.com seems still down But I managed to download the supplementary files from the [paper](https://www.ncbi.nlm.nih.gov/pubmed/25925571). -I put them [here](https://github.com/BjornFJohansson/pydna/tree/py3dev/scripts/files_for_annotation_function) +I put them [here](https://github.com/pydna-group/pydna/tree/py3dev/scripts/files_for_annotation_function) A number of sbol files seems to hold most or all of the data: diff --git a/src/pydna/__init__.py b/src/pydna/__init__.py index 6c6a09f3..98706826 100644 --- a/src/pydna/__init__.py +++ b/src/pydna/__init__.py @@ -16,7 +16,7 @@ Pydna is a python package providing code for simulation of the creation of recombinant DNA molecules using `molecular biology `_ -techniques. Development of pydna happens in this Github `repository `_. +techniques. Development of pydna happens in this Github `repository `_. Provided: 1. PCR simulation @@ -87,7 +87,7 @@ The doctrings are also used to provide an automaticly generated reference manual available online at -`read the docs `_. +`read the docs `_. Docstrings can be explored using `IPython `_, an advanced Python shell with @@ -112,7 +112,7 @@ ----------------- The pydna source code is -`available on Github `_. +`available on Github `_. How to get more help -------------------- @@ -121,7 +121,7 @@ `Google group `_ for pydna, this is the preferred location for help. If you find bugs in pydna itself, open an issue at the -`Github repository `_. +`Github repository `_. Examples of pydna in use ------------------------ diff --git a/src/pydna/dseqrecord.py b/src/pydna/dseqrecord.py index cfeb2815..83d848fd 100644 --- a/src/pydna/dseqrecord.py +++ b/src/pydna/dseqrecord.py @@ -806,7 +806,7 @@ def __getitem__(self, sl): if not self.circular or sl_start < sl_stop: # TODO: special case for sl_end == 0 in circular sequences - # related to https://github.com/BjornFJohansson/pydna/issues/161 + # related to https://github.com/pydna-group/pydna/issues/161 if self.circular and sl.stop == 0: sl = slice(sl.start, len(self.seq), sl.step) answer.features = super().__getitem__(sl).features @@ -1283,7 +1283,7 @@ def cut(self, *enzymes): def apply_cut(self, left_cut, right_cut): dseq = self.seq.apply_cut(left_cut, right_cut) - # TODO: maybe remove depending on https://github.com/BjornFJohansson/pydna/issues/161 + # TODO: maybe remove depending on https://github.com/pydna-group/pydna/issues/161 if left_cut == right_cut: # Not really a cut, but to handle the general case @@ -1292,7 +1292,7 @@ def apply_cut(self, left_cut, right_cut): else: # The features that span the origin if shifting with left_cut, but that do not cross # the cut site should be included, and if there is a feature within the cut site, it should - # be duplicated. See https://github.com/BjornFJohansson/pydna/issues/180 for a practical example. + # be duplicated. See https://github.com/pydna-group/pydna/issues/180 for a practical example. # # Let's say we are going to open a circular plasmid like below (| inidicate cuts, numbers indicate # features) diff --git a/src/pydna/utils.py b/src/pydna/utils.py index b42d4696..e4ff465d 100644 --- a/src/pydna/utils.py +++ b/src/pydna/utils.py @@ -94,7 +94,7 @@ def shift_location(original_location, shift, lim): # in which consecutive parts do not have any bases between them. # This type of feature is generated to represent a feature that # spans the origin of a circular sequence. See more details in - # https://github.com/BjornFJohansson/pydna/issues/195 + # https://github.com/pydna-group/pydna/issues/195 if len(part) == 0: newparts.append(_sl(new_start, new_start, strand)) diff --git a/tests/test_module_amplify.py b/tests/test_module_amplify.py index 0fa15014..3344043c 100644 --- a/tests/test_module_amplify.py +++ b/tests/test_module_amplify.py @@ -784,7 +784,7 @@ def test_shifts(): def test_annotation(): """ Test that annotations are correctly added to the amplicon in primers with tails - https://github.com/BjornFJohansson/pydna/issues/279 + https://github.com/pydna-group/pydna/issues/279 """ from pydna.amplify import pcr from pydna.dseqrecord import Dseqrecord diff --git a/tests/test_module_dseq.py b/tests/test_module_dseq.py index aaf8e2a2..504ac13d 100644 --- a/tests/test_module_dseq.py +++ b/tests/test_module_dseq.py @@ -544,7 +544,7 @@ def test_dseq(): assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI) obj = Dseq("ggatccAGATCT", circular=True) - # TODO: address this test change Related to https://github.com/BjornFJohansson/pydna/issues/78 + # TODO: address this test change Related to https://github.com/pydna-group/pydna/issues/78 assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI) obj = Dseq("AGATCTggatcc", circular=True) @@ -578,7 +578,7 @@ def test_Dseq_slicing2(): from Bio.Restriction import BamHI, EcoRI, KpnI a = Dseq("aaGGATCCnnnnnnnnnGAATTCccc", circular=True) - # TODO: address this test change Related to https://github.com/BjornFJohansson/pydna/issues/78 + # TODO: address this test change Related to https://github.com/pydna-group/pydna/issues/78 assert a.cut( EcoRI, @@ -608,7 +608,7 @@ def test_Dseq___getitem__(): assert s[9:1] == Dseq("") assert t[9:1] == Dseq("") - # Indexing of full circular molecule (https://github.com/BjornFJohansson/pydna/issues/161) + # Indexing of full circular molecule (https://github.com/pydna-group/pydna/issues/161) s = Dseq("GGATCC", circular=True) str_seq = str(s) for shift in range(len(s)): @@ -742,7 +742,7 @@ def test_misc(): a, b = x.cut(NotI) z = (a + b).looped() - # TODO: address this test change Related to https://github.com/BjornFJohansson/pydna/issues/78 + # TODO: address this test change Related to https://github.com/pydna-group/pydna/issues/78 assert z.shifted(-6) == x diff --git a/tests/test_module_dseqrecord.py b/tests/test_module_dseqrecord.py index 6cf61f11..07d44e2a 100644 --- a/tests/test_module_dseqrecord.py +++ b/tests/test_module_dseqrecord.py @@ -1893,7 +1893,7 @@ def test___getitem__(): assert len(seqRecord[2:20].features) == 1 assert len(seqRecord[13:8].features) == 1 - # Indexing of full circular molecule (https://github.com/BjornFJohansson/pydna/issues/161) + # Indexing of full circular molecule (https://github.com/pydna-group/pydna/issues/161) s = Dseqrecord("GGATCC", circular=True) str_seq = str(s.seq) for shift in range(len(s)): diff --git a/tests/test_module_utils.py b/tests/test_module_utils.py index 6a25d74f..55f10dcf 100644 --- a/tests/test_module_utils.py +++ b/tests/test_module_utils.py @@ -468,7 +468,7 @@ def test_shift_location(): # TODO: more tests here - # Shifting of locations should be reversible (https://github.com/BjornFJohansson/pydna/issues/195) + # Shifting of locations should be reversible (https://github.com/pydna-group/pydna/issues/195) for strand in (1, -1, None): loc = SimpleLocation(0, 2, strand) assert shift_location(shift_location(loc, 1, 6), -1, 6) == loc @@ -483,7 +483,7 @@ def test_shift_location(): assert shift_location(loc, -1, 6) == SimpleLocation(5, 6, strand) + SimpleLocation(0, 3, strand) # Shifting ignoring the sequence length - # See https://github.com/BjornFJohansson/pydna/issues/281 + # See https://github.com/pydna-group/pydna/issues/281 for strand in (1, -1, None): loc = SimpleLocation(4, 6, strand) assert shift_location(loc, 1000, None) == SimpleLocation(1004, 1006, strand) From 83b7ead78485b377d77d8df4af3569ef60e5ef4c Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:17:43 +0000 Subject: [PATCH 3/8] use poetry 1.8 for python 3.8 tests --- .github/workflows/pydna_test_and_coverage_workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pydna_test_and_coverage_workflow.yml b/.github/workflows/pydna_test_and_coverage_workflow.yml index 09e2cd82..3306b839 100644 --- a/.github/workflows/pydna_test_and_coverage_workflow.yml +++ b/.github/workflows/pydna_test_and_coverage_workflow.yml @@ -51,7 +51,7 @@ jobs: uses: snok/install-poetry@v1 with: virtualenvs.prefer-active-python: true - + version: ${{ matrix.python-version == '3.8' && '1.8.2' || 'latest' }} - name: 🔩 list Poetry settings run: poetry config --list From 58792c5fd13766aaf34c44228a925b460dc6dc27 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:21:15 +0000 Subject: [PATCH 4/8] update pyproject to work with poetry 1.8 --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index fa4c5b63..cc3a7e99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,10 @@ readme = "README.md" Changelog = "https://github.com/pydna-group/pydna/blob/master/docs/CHANGELOG.md#changelog" [tool.poetry] description = "Representing double stranded DNA and functions for simulating cloning and homologous recombination between DNA molecules." +authors = [ + {name = "Björn F. Johansson", email = "bjornjobb@gmail.com"}, + {name = "Manuel Lera-Ramirez", email = "manulera14@gmail.com"} +] documentation = "https://pydna-group.github.io/pydna" homepage = "https://github.com/pydna-group/pydna#-pydna" license = "BSD" From 87908963290ec62f646b8897a62eb4d59e17722f Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:28:22 +0000 Subject: [PATCH 5/8] fix installation line in google collab --- docs/notebooks/CRISPR.ipynb | 9 +++------ docs/notebooks/Dseq.ipynb | 9 +++------ docs/notebooks/Dseq_Features.ipynb | 9 +++------ docs/notebooks/Example_CRISPR.ipynb | 9 +++------ docs/notebooks/Example_Gibson.ipynb | 9 +++------ docs/notebooks/Example_Restriction.ipynb | 9 +++------ docs/notebooks/Gibson.ipynb | 9 +++------ docs/notebooks/Importing_Seqs.ipynb | 9 +++------ docs/notebooks/PCR.ipynb | 9 +++------ docs/notebooks/Restrict_Ligate_Cloning.ipynb | 9 +++------ docs/notebooks/primer_design.ipynb | 13 +++++++++++++ docs/notebooks/readme_example.ipynb | 9 +++------ 12 files changed, 46 insertions(+), 66 deletions(-) diff --git a/docs/notebooks/CRISPR.ipynb b/docs/notebooks/CRISPR.ipynb index 7c223dbd..41aca0c3 100644 --- a/docs/notebooks/CRISPR.ipynb +++ b/docs/notebooks/CRISPR.ipynb @@ -28,14 +28,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -131,4 +128,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Dseq.ipynb b/docs/notebooks/Dseq.ipynb index 1073ef34..c8dfab06 100644 --- a/docs/notebooks/Dseq.ipynb +++ b/docs/notebooks/Dseq.ipynb @@ -23,14 +23,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -794,4 +791,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Dseq_Features.ipynb b/docs/notebooks/Dseq_Features.ipynb index 68d36576..9e0e7fb2 100644 --- a/docs/notebooks/Dseq_Features.ipynb +++ b/docs/notebooks/Dseq_Features.ipynb @@ -30,14 +30,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna" ] }, { @@ -818,4 +815,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Example_CRISPR.ipynb b/docs/notebooks/Example_CRISPR.ipynb index 6251d842..6a3be063 100644 --- a/docs/notebooks/Example_CRISPR.ipynb +++ b/docs/notebooks/Example_CRISPR.ipynb @@ -75,14 +75,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -270,4 +267,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Example_Gibson.ipynb b/docs/notebooks/Example_Gibson.ipynb index f99f4f31..03ca07c1 100755 --- a/docs/notebooks/Example_Gibson.ipynb +++ b/docs/notebooks/Example_Gibson.ipynb @@ -27,14 +27,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -267,4 +264,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Example_Restriction.ipynb b/docs/notebooks/Example_Restriction.ipynb index 91406719..4c83bcfd 100755 --- a/docs/notebooks/Example_Restriction.ipynb +++ b/docs/notebooks/Example_Restriction.ipynb @@ -31,14 +31,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -1014,4 +1011,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Gibson.ipynb b/docs/notebooks/Gibson.ipynb index 0a739436..8b9cdf2b 100644 --- a/docs/notebooks/Gibson.ipynb +++ b/docs/notebooks/Gibson.ipynb @@ -32,14 +32,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -164,4 +161,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Importing_Seqs.ipynb b/docs/notebooks/Importing_Seqs.ipynb index 16f079fb..15b366e7 100755 --- a/docs/notebooks/Importing_Seqs.ipynb +++ b/docs/notebooks/Importing_Seqs.ipynb @@ -34,14 +34,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -405,4 +402,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/PCR.ipynb b/docs/notebooks/PCR.ipynb index 81c48079..4cc29260 100755 --- a/docs/notebooks/PCR.ipynb +++ b/docs/notebooks/PCR.ipynb @@ -32,14 +32,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -395,4 +392,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/Restrict_Ligate_Cloning.ipynb b/docs/notebooks/Restrict_Ligate_Cloning.ipynb index 55fc35f7..3b1ed426 100644 --- a/docs/notebooks/Restrict_Ligate_Cloning.ipynb +++ b/docs/notebooks/Restrict_Ligate_Cloning.ipynb @@ -29,14 +29,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna\n" + " %pip install pydna\n" ] }, { @@ -319,4 +316,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/notebooks/primer_design.ipynb b/docs/notebooks/primer_design.ipynb index 5991f67d..d116803d 100644 --- a/docs/notebooks/primer_design.ipynb +++ b/docs/notebooks/primer_design.ipynb @@ -18,6 +18,19 @@ "" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/notebooks/readme_example.ipynb b/docs/notebooks/readme_example.ipynb index 68992ece..485c4891 100644 --- a/docs/notebooks/readme_example.ipynb +++ b/docs/notebooks/readme_example.ipynb @@ -19,14 +19,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# Install pydna (only when running on Colab)\n", "import sys\n", "if 'google.colab' in sys.modules:\n", - " %%capture\n", - " # Install the current development version of pydna (comment to install pip version)\n", - " !pip install git+https://github.com/pydna-group/pydna@dev_bjorn\n", - " # Install pip version instead (uncomment to install)\n", - " # !pip install pydna" + " %pip install pydna\n" ] }, { @@ -366,4 +363,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} From 44f1dadcd494226bbb049b5024a2687da2c5feb5 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:32:43 +0000 Subject: [PATCH 6/8] fix pyproject.toml and README instructions for poetry --- README.md | 5 ++++- pyproject.toml | 5 +---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 951a5c6c..506b92dc 100755 --- a/README.md +++ b/README.md @@ -435,7 +435,10 @@ poetry config virtualenvs.in-project true # Install dependencies (extras are required for tests to pass) poetry install --all-extras -# Activate virtual environment +# Activate virtual environment (poetry version 2) +poetry env activate + +# Activate virtual environment (poetry version 1) poetry shell # Install pre-commit hooks diff --git a/pyproject.toml b/pyproject.toml index cc3a7e99..35dcb88b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,10 +27,7 @@ readme = "README.md" Changelog = "https://github.com/pydna-group/pydna/blob/master/docs/CHANGELOG.md#changelog" [tool.poetry] description = "Representing double stranded DNA and functions for simulating cloning and homologous recombination between DNA molecules." -authors = [ - {name = "Björn F. Johansson", email = "bjornjobb@gmail.com"}, - {name = "Manuel Lera-Ramirez", email = "manulera14@gmail.com"} -] +authors = ["Björn F. Johansson", "Manuel Lera-Ramirez"] documentation = "https://pydna-group.github.io/pydna" homepage = "https://github.com/pydna-group/pydna#-pydna" license = "BSD" From f62025c04bba7647b5289558e5a719e39cbf212e Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:34:14 +0000 Subject: [PATCH 7/8] clean notebooks --- docs/notebooks/CRISPR.ipynb | 258 +-- docs/notebooks/Dseq.ipynb | 1582 +++++++------- docs/notebooks/Dseq_Features.ipynb | 1632 +++++++------- docs/notebooks/Example_CRISPR.ipynb | 536 ++--- docs/notebooks/Example_Gibson.ipynb | 530 ++--- docs/notebooks/Example_Restriction.ipynb | 2024 +++++++++--------- docs/notebooks/Gibson.ipynb | 324 +-- docs/notebooks/Importing_Seqs.ipynb | 806 +++---- docs/notebooks/PCR.ipynb | 786 +++---- docs/notebooks/Restrict_Ligate_Cloning.ipynb | 634 +++--- docs/notebooks/readme_example.ipynb | 726 +++---- 11 files changed, 4919 insertions(+), 4919 deletions(-) diff --git a/docs/notebooks/CRISPR.ipynb b/docs/notebooks/CRISPR.ipynb index 41aca0c3..c0af0c41 100644 --- a/docs/notebooks/CRISPR.ipynb +++ b/docs/notebooks/CRISPR.ipynb @@ -1,131 +1,131 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to Model CRISPR-Cas9 Experiments in pydna\n", - "\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "The pydna package can simulate CRISPR-Cas9 editing, which allows one to cut DNA sequences at specific sites using guide RNAs (gRNAs) that direct the Cas9 protein. This page will guide you through the process of using the `pydna.crispr` module to model a CRISPR-Cas9 cut on a DNA sequence.\n", - "\n", - "The `pydna.crispr` module contains the `cas9` class to simulate the biological activites of the Cas9 protein and the guideRNA, which should be imported. In addtion, the `Dseqrecord` class has also been imported to generate an example target_sequence." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pydna.crispr import cas9, protospacer\n", - "from pydna.dseqrecord import Dseqrecord" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The target sequence and guideRNA (gRNA) sequence needs to be generated. Note the the sequence can be passed as a `Dseqrecord` object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cutting with enzyme 1: (Dseqrecord(-17), Dseqrecord(-6))\n", - "protospacer: GTTACTTTACCCGACGTCCC\n", - "cutting with enzyme 2: (Dseqrecord(-17), Dseqrecord(-6))\n", - "cutting with no PAM in target: ()\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.crispr import cas9, protospacer\n", - "\n", - "# <----protospacer---><-------scaffold----------------->\n", - "guide = \"GTTACTTTACCCGACGTCCCgttttagagctagaaatagcaagttaaaataagg\"\n", - "target = \"GTTACTTTACCCGACGTCCCaGG\"\n", - "# <->\n", - "# PAM\n", - "\n", - "# Create an enzyme object with the protospacer\n", - "enzyme = cas9(\"GTTACTTTACCCGACGTCCC\")\n", - "\n", - "target_dseq = Dseqrecord(target)\n", - "\n", - "# Cut using the enzyme\n", - "print('cutting with enzyme 1:', target_dseq.cut(enzyme))\n", - "\n", - "\n", - "# Get the protospacer from the full gRNA sequence\n", - "gRNA_protospacers = protospacer(Dseqrecord(guide), cas=cas9)\n", - "# Print the protospacer (it's a list because often plasmids contain multiple gRNAs)\n", - "print('protospacer:', gRNA_protospacers[0])\n", - "gRNA_protospacer = gRNA_protospacers[0]\n", - "\n", - "# Create an enzyme from the protospacer\n", - "enzyme2 = cas9(gRNA_protospacer)\n", - "\n", - "# Simulate the cut\n", - "print('cutting with enzyme 2:', target_dseq.cut(enzyme2))\n", - "\n", - "\n", - "# Note that without the PAM, the cut will not be made.\n", - "\n", - "target_noPAM_dseq = Dseqrecord(\"GTTACTTTACCCGACGTCCCaaa\")\n", - "print(\"cutting with no PAM in target:\", target_noPAM_dseq.cut(enzyme2))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to Model CRISPR-Cas9 Experiments in pydna\n", + "\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "The pydna package can simulate CRISPR-Cas9 editing, which allows one to cut DNA sequences at specific sites using guide RNAs (gRNAs) that direct the Cas9 protein. This page will guide you through the process of using the `pydna.crispr` module to model a CRISPR-Cas9 cut on a DNA sequence.\n", + "\n", + "The `pydna.crispr` module contains the `cas9` class to simulate the biological activites of the Cas9 protein and the guideRNA, which should be imported. In addtion, the `Dseqrecord` class has also been imported to generate an example target_sequence." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydna.crispr import cas9, protospacer\n", + "from pydna.dseqrecord import Dseqrecord" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The target sequence and guideRNA (gRNA) sequence needs to be generated. Note the the sequence can be passed as a `Dseqrecord` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cutting with enzyme 1: (Dseqrecord(-17), Dseqrecord(-6))\n", + "protospacer: GTTACTTTACCCGACGTCCC\n", + "cutting with enzyme 2: (Dseqrecord(-17), Dseqrecord(-6))\n", + "cutting with no PAM in target: ()\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.crispr import cas9, protospacer\n", + "\n", + "# <----protospacer---><-------scaffold----------------->\n", + "guide = \"GTTACTTTACCCGACGTCCCgttttagagctagaaatagcaagttaaaataagg\"\n", + "target = \"GTTACTTTACCCGACGTCCCaGG\"\n", + "# <->\n", + "# PAM\n", + "\n", + "# Create an enzyme object with the protospacer\n", + "enzyme = cas9(\"GTTACTTTACCCGACGTCCC\")\n", + "\n", + "target_dseq = Dseqrecord(target)\n", + "\n", + "# Cut using the enzyme\n", + "print('cutting with enzyme 1:', target_dseq.cut(enzyme))\n", + "\n", + "\n", + "# Get the protospacer from the full gRNA sequence\n", + "gRNA_protospacers = protospacer(Dseqrecord(guide), cas=cas9)\n", + "# Print the protospacer (it's a list because often plasmids contain multiple gRNAs)\n", + "print('protospacer:', gRNA_protospacers[0])\n", + "gRNA_protospacer = gRNA_protospacers[0]\n", + "\n", + "# Create an enzyme from the protospacer\n", + "enzyme2 = cas9(gRNA_protospacer)\n", + "\n", + "# Simulate the cut\n", + "print('cutting with enzyme 2:', target_dseq.cut(enzyme2))\n", + "\n", + "\n", + "# Note that without the PAM, the cut will not be made.\n", + "\n", + "target_noPAM_dseq = Dseqrecord(\"GTTACTTTACCCGACGTCCCaaa\")\n", + "print(\"cutting with no PAM in target:\", target_noPAM_dseq.cut(enzyme2))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Dseq.ipynb b/docs/notebooks/Dseq.ipynb index c8dfab06..8e884ebd 100644 --- a/docs/notebooks/Dseq.ipynb +++ b/docs/notebooks/Dseq.ipynb @@ -1,794 +1,794 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Representing sequences in pydna\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pydna contains classes to represent double stranded DNA sequences that can:\n", - "\n", - "* Be linear\n", - "* Be circular\n", - "* Contain overhangs (sticky ends).\n", - "\n", - "These sequences can be used to simulate molecular biology methods such as cloning and PCR. The main classes used to represent sequences are `Dseq` and `Dseqrecord`.\n", - "* `Dseq` represents the sequence only. Think of it as a FASTA file.\n", - "* `Dseqrecord` can contain sequence features and other info such as publication, authors, etc. Think of it as a Genbank file.\n", - "\n", - "> NOTE: The `Dseq` class is a subclass of biopython's `Seq`, whose documentation can be found [here](https://biopython.org/wiki/Seq). `Dseqrecord` is a subclass of biopython's `SeqRecord`, whose documentation can be found [here](https://biopython.org/wiki/SeqRecord)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Dseq Class\n", - "\n", - "We can create a `Dseq` object in different ways.\n", - "\n", - "For a linear sequence without overhangs, we create a `Dseq` object passing a string with the sequence. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "aatat\n", - "ttata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.dseq import Dseq\n", - "my_seq = Dseq(\"aatat\")\n", - "my_seq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the console representation above, there are three lines:\n", - "1. `Dseq(-5)` indicates that the sequence is linear and has 5 basepairs.\n", - "2. `aatat`, the top / sense / watson strand, referred from now on as **watson** strand..\n", - "3. `ttata`, the bottom / anti-sense / crick strand, referred from now on as **crick** strand.\n", - "\n", - "Now, let's create a circular sequence:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(o5)\n", - "aatat\n", - "ttata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", circular=True)\n", - "my_seq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> Note how `o5` indicates that the sequence is circular and has 5 basepairs.\n", - "\n", - "One way to represent a linear sequence with overhangs is to instantiate `Dseq` with the following arguments:\n", - "* The `watson` strand as a string in the 5'-3' direction.\n", - "* The `crick` strand as a string in the 5'-3' direction.\n", - "* The 5' overhang `ovhg` (overhang), which can be positive or negative, and represents the number of basepairs that the `watson` strand extends beyond the `crick` strand." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "actag\n", - " gatc" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"actag\", \"ctag\", -1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> Note how the bottom strand is passed in the 5'-3' direction, but it is represented in the 3'-5' direction in the console output.\n", - "\n", - "If you omit the `ovhg` argument, pydna will try to find the value that makes the `watson` and `crick` strands complementary." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "actag\n", - " gatc" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"actag\", \"ctag\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The best way to get a feeling for the meaning of `ovhg` is to visualise the possible scenarios as such:\n", - "\n", - "```\n", - "dsDNA overhang\n", - "\n", - " nnn... 2\n", - "nnnnn...\n", - "\n", - " nnnn... 1\n", - "nnnnn...\n", - "\n", - "nnnnn... 0\n", - "nnnnn...\n", - "\n", - "nnnnn... -1\n", - " nnnn...\n", - "\n", - "nnnnn... -2\n", - " nnn...\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of note, the DNA sequence can be passed in both lower case and upper case, and are not restricted to the conventional ATCG nucleotides (E.g ), The class supports the IUPAC ambiguous nucleotide code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "Actag\n", - " gatC" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"Actag\", \"Ctag\", -1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another way to pass the overhangs is to use the `from_full_sequence_and_overhangs` classmethod, which only needs the `watson`/sense strand. This is useful you can only store the entire sequence (e.g. in a FASTA file), or if you want to specify overhangs on both sides of the double stranded DNA when you create the object.\n", - "\n", - "Both the `watson_ovhg` and `crick_ovhg` can be passed following the same rules as above. Specifically, the `crick_ovhg` argument is identical to the conventional `ovhg` argument. The `watson_ovhg` argument is the `ovhg` argument applied to the reverse complementary sequence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-8)\n", - "aaatta\n", - " aattt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", - "my_seq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A list of possible scenarios, applying positive and negative `crick_ovhg` and `watson_ovhg` to a `Dseq` object are visualised in the output of the code below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "watson_ovhg is -3, crick_ovhg is -2\n", - "Dseq(-8)\n", - "aaatt\n", - " taattt\n", - "\n", - "watson_ovhg is 3, crick_ovhg is -2\n", - "Dseq(-8)\n", - "aaattaaa\n", - " taa\n", - "\n", - "watson_ovhg is -3, crick_ovhg is 2\n", - "Dseq(-8)\n", - " att\n", - "tttaattt\n", - "\n", - "watson_ovhg is 3, crick_ovhg is 2\n", - "Dseq(-8)\n", - " attaaa\n", - "tttaa\n", - "\n" - ] - } - ], - "source": [ - "for crick_ovhg in [-2, 2]:\n", - " for watson_ovhg in [-3, 3]:\n", - " print(\"watson_ovhg is \" + str(watson_ovhg) + \", crick_ovhg is \" + str(crick_ovhg))\n", - " my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg, watson_ovhg)\n", - " print(my_seq.__repr__() + \"\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The drawing below can help visualize the meaning of the overhangs.\n", - "```\n", - " (-3)--(-2)--(-1)--(x)--(x)--(x)--(-1)--(-2)\n", - "\n", - "5'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)3'\n", - "3'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)5'\n", - "\n", - "5'( a)--( a)--( a)--(t)--(t)--(a)--( )--( )3'\n", - "3'( )--( )--( )--(t)--(t)--(a)--( a)--( a)5'\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you would like to check the overhangs for a `Dseq` object, it can be done by calling the methods `five_prime_end` and `three_prime_end` to show the 5' and 3' overhangs, respectively. An example of a `Dseq` object, and examples showing what the print-out of the methods looks like are demonstrated here:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseq(-7)\n", - "aatat\n", - " atatt\n", - "(\"5'\", 'aa')\n", - "(\"5'\", 'tt')\n" - ] - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", - "print(my_seq.__repr__())\n", - "print(my_seq.five_prime_end())\n", - "print(my_seq.three_prime_end())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you now want to join your sequence's sticky ends to make a circular sequence (i.e Plasmid), you can use the `looped` method. The sticky ends must be compatible to do so.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(o5)\n", - "aatat\n", - "ttata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", - "my_seq.looped()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you want to change the circular origin of the sequence/plasmid, this can be easily done using the `shifted` method. This can be done by providing the number of bases between the original origin with the new origin: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(o5)\n", - "tataa\n", - "atatt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatat\", circular=True)\n", - "my_seq.shifted(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## __getitem__, __repr__, and __str__ methods\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Slicing sequences (`__getitem__`)\n", - "\n", - "`__getitem__` is the method that is called when you use the square brackets `[]` after a python object. Below is an example of the builtin python `list`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "using square brackets: [2, 3]\n", - "is the same as using __getitem__: [2, 3]\n" - ] - } - ], - "source": [ - "my_list = [1, 2, 3]\n", - "\n", - "print('using square brackets:', my_list[1:])\n", - "print('is the same as using __getitem__:', my_list.__getitem__(slice(1, None)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `__getitem__` method is modified in pydna to deal with `Dseq` objects and returns a slice of the `Dseq` object, defined by the a start value and a stop value, similarly to string indexing. In other words, `__getitem__` indexes `Dseq`. Note that '__getitem__' (and, consequently, `[]`) uses zero-based indexing." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-3)\n", - "tat\n", - "ata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq(\"aatataa\")\n", - "my_seq[2:5]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`__getitem__` respects overhangs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-5)\n", - "tata\n", - "atatt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_seq = Dseq.from_full_sequence_and_overhangs(\"aatataa\", crick_ovhg=0, watson_ovhg=-1)\n", - "my_seq[2:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that index zero corresponds to the leftmost base of the sequence, which might not necessarily be on the `watson` strand. Let's create a sequence that has an overhang on the left side." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-10)\n", - " acgttcc\n", - "ttatgcaagg" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sequence_with_overhangs = Dseq.from_full_sequence_and_overhangs(\"aatacgttcc\", crick_ovhg=3, watson_ovhg=0)\n", - "sequence_with_overhangs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When we index starting from `2`, we don't start counting on the watson, but on the crick strand since it is the leftmost one." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-8)\n", - " acgttcc\n", - "atgcaagg" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sequence_with_overhangs[2:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Slicing circular sequences\n", - "When slicing circular `Dseq` objects we get linear sequences." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-4)\n", - "atct\n", - "taga" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "circular_seq = Dseq(\"aatctaa\", circular=True)\n", - "circular_seq[1:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can slice circular sequences across the origin (where index is zero) if the first index is bigger than the second index. This is demonstrated in the example below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-4)\n", - "aaaa\n", - "tttt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "circular_seq[5:2]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Printing sequences to the console: `__repr__` and `__str__`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`__repr__` and `__str__` are methods present in all python classes that return a string representation of an object. `__str__` is called by the `print` function, and `__repr__` is used by the console or notebook output when the object is not assigned to a variable. Below is an example with a `date` object:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> print statement: 2023-08-15\n", - "> repr: datetime.date(2023, 8, 15)\n", - "> repr from class method: datetime.date(2023, 8, 15)\n", - "\n", - "> console output:\n" - ] - }, - { - "data": { - "text/plain": [ - "datetime.date(2023, 8, 15)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import datetime\n", - "\n", - "my_date = datetime.date(2023, 8, 15)\n", - "\n", - "print('> print statement:', my_date)\n", - "print('> repr:', repr(my_date))\n", - "print('> repr from class method:', my_date.__repr__())\n", - "\n", - "print()\n", - "print('> console output:')\n", - "my_date" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In a similar way, `__repr__` and `__str__` methods are used by pydna to represent sequences as strings for different purposes:\n", - "\n", - "* `__repr__` is used to make a figure-like representation that shows both strands and the overhangs.\n", - "* `__str__` is used to return the entire sequence as a string of characters (from the left-most to the right-most base of both strands), the way we would store it in a FASTA file.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> figure-like representation:\n", - " Dseq(-8)\n", - "aaatta\n", - " aattt\n", - "\n", - "> string representation:\n", - " aaattaaa\n" - ] - } - ], - "source": [ - "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", - "print('> figure-like representation:\\n', my_seq.__repr__())\n", - "print()\n", - "print('> string representation:\\n', my_seq)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that on the string representation, the bases correspond to the entire sequence provided, even when they are only present on either the `watson` or `crick` strand. In the example above, the last two `aa` bases are missing from the `watson` strand, and that only the `crick` strand has them." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Edge cases\n", - "\n", - "You can create arbitrary double-stranded sequences that are not complementary if you specify both strands and an overhang, but you won't be able to use them for molecular biology simulations. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseq(-6)\n", - " xxxx\n", - "tata" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dseq(\"xxxx\", \"atat\", ovhg=2)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Representing sequences in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pydna contains classes to represent double stranded DNA sequences that can:\n", + "\n", + "* Be linear\n", + "* Be circular\n", + "* Contain overhangs (sticky ends).\n", + "\n", + "These sequences can be used to simulate molecular biology methods such as cloning and PCR. The main classes used to represent sequences are `Dseq` and `Dseqrecord`.\n", + "* `Dseq` represents the sequence only. Think of it as a FASTA file.\n", + "* `Dseqrecord` can contain sequence features and other info such as publication, authors, etc. Think of it as a Genbank file.\n", + "\n", + "> NOTE: The `Dseq` class is a subclass of biopython's `Seq`, whose documentation can be found [here](https://biopython.org/wiki/Seq). `Dseqrecord` is a subclass of biopython's `SeqRecord`, whose documentation can be found [here](https://biopython.org/wiki/SeqRecord)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Dseq Class\n", + "\n", + "We can create a `Dseq` object in different ways.\n", + "\n", + "For a linear sequence without overhangs, we create a `Dseq` object passing a string with the sequence. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "aatat\n", + "ttata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.dseq import Dseq\n", + "my_seq = Dseq(\"aatat\")\n", + "my_seq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the console representation above, there are three lines:\n", + "1. `Dseq(-5)` indicates that the sequence is linear and has 5 basepairs.\n", + "2. `aatat`, the top / sense / watson strand, referred from now on as **watson** strand..\n", + "3. `ttata`, the bottom / anti-sense / crick strand, referred from now on as **crick** strand.\n", + "\n", + "Now, let's create a circular sequence:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(o5)\n", + "aatat\n", + "ttata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", circular=True)\n", + "my_seq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Note how `o5` indicates that the sequence is circular and has 5 basepairs.\n", + "\n", + "One way to represent a linear sequence with overhangs is to instantiate `Dseq` with the following arguments:\n", + "* The `watson` strand as a string in the 5'-3' direction.\n", + "* The `crick` strand as a string in the 5'-3' direction.\n", + "* The 5' overhang `ovhg` (overhang), which can be positive or negative, and represents the number of basepairs that the `watson` strand extends beyond the `crick` strand." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "actag\n", + " gatc" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"actag\", \"ctag\", -1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Note how the bottom strand is passed in the 5'-3' direction, but it is represented in the 3'-5' direction in the console output.\n", + "\n", + "If you omit the `ovhg` argument, pydna will try to find the value that makes the `watson` and `crick` strands complementary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "actag\n", + " gatc" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"actag\", \"ctag\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The best way to get a feeling for the meaning of `ovhg` is to visualise the possible scenarios as such:\n", + "\n", + "```\n", + "dsDNA overhang\n", + "\n", + " nnn... 2\n", + "nnnnn...\n", + "\n", + " nnnn... 1\n", + "nnnnn...\n", + "\n", + "nnnnn... 0\n", + "nnnnn...\n", + "\n", + "nnnnn... -1\n", + " nnnn...\n", + "\n", + "nnnnn... -2\n", + " nnn...\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of note, the DNA sequence can be passed in both lower case and upper case, and are not restricted to the conventional ATCG nucleotides (E.g ), The class supports the IUPAC ambiguous nucleotide code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "Actag\n", + " gatC" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"Actag\", \"Ctag\", -1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another way to pass the overhangs is to use the `from_full_sequence_and_overhangs` classmethod, which only needs the `watson`/sense strand. This is useful you can only store the entire sequence (e.g. in a FASTA file), or if you want to specify overhangs on both sides of the double stranded DNA when you create the object.\n", + "\n", + "Both the `watson_ovhg` and `crick_ovhg` can be passed following the same rules as above. Specifically, the `crick_ovhg` argument is identical to the conventional `ovhg` argument. The `watson_ovhg` argument is the `ovhg` argument applied to the reverse complementary sequence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-8)\n", + "aaatta\n", + " aattt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", + "my_seq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A list of possible scenarios, applying positive and negative `crick_ovhg` and `watson_ovhg` to a `Dseq` object are visualised in the output of the code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "watson_ovhg is -3, crick_ovhg is -2\n", + "Dseq(-8)\n", + "aaatt\n", + " taattt\n", + "\n", + "watson_ovhg is 3, crick_ovhg is -2\n", + "Dseq(-8)\n", + "aaattaaa\n", + " taa\n", + "\n", + "watson_ovhg is -3, crick_ovhg is 2\n", + "Dseq(-8)\n", + " att\n", + "tttaattt\n", + "\n", + "watson_ovhg is 3, crick_ovhg is 2\n", + "Dseq(-8)\n", + " attaaa\n", + "tttaa\n", + "\n" + ] + } + ], + "source": [ + "for crick_ovhg in [-2, 2]:\n", + " for watson_ovhg in [-3, 3]:\n", + " print(\"watson_ovhg is \" + str(watson_ovhg) + \", crick_ovhg is \" + str(crick_ovhg))\n", + " my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg, watson_ovhg)\n", + " print(my_seq.__repr__() + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The drawing below can help visualize the meaning of the overhangs.\n", + "```\n", + " (-3)--(-2)--(-1)--(x)--(x)--(x)--(-1)--(-2)\n", + "\n", + "5'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)3'\n", + "3'( a)--( a)--( a)--(t)--(t)--(a)--( a)--( a)5'\n", + "\n", + "5'( a)--( a)--( a)--(t)--(t)--(a)--( )--( )3'\n", + "3'( )--( )--( )--(t)--(t)--(a)--( a)--( a)5'\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to check the overhangs for a `Dseq` object, it can be done by calling the methods `five_prime_end` and `three_prime_end` to show the 5' and 3' overhangs, respectively. An example of a `Dseq` object, and examples showing what the print-out of the methods looks like are demonstrated here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseq(-7)\n", + "aatat\n", + " atatt\n", + "(\"5'\", 'aa')\n", + "(\"5'\", 'tt')\n" + ] + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", + "print(my_seq.__repr__())\n", + "print(my_seq.five_prime_end())\n", + "print(my_seq.three_prime_end())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you now want to join your sequence's sticky ends to make a circular sequence (i.e Plasmid), you can use the `looped` method. The sticky ends must be compatible to do so.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(o5)\n", + "aatat\n", + "ttata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", \"ttata\", ovhg=-2)\n", + "my_seq.looped()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to change the circular origin of the sequence/plasmid, this can be easily done using the `shifted` method. This can be done by providing the number of bases between the original origin with the new origin: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(o5)\n", + "tataa\n", + "atatt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatat\", circular=True)\n", + "my_seq.shifted(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## __getitem__, __repr__, and __str__ methods\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Slicing sequences (`__getitem__`)\n", + "\n", + "`__getitem__` is the method that is called when you use the square brackets `[]` after a python object. Below is an example of the builtin python `list`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "using square brackets: [2, 3]\n", + "is the same as using __getitem__: [2, 3]\n" + ] + } + ], + "source": [ + "my_list = [1, 2, 3]\n", + "\n", + "print('using square brackets:', my_list[1:])\n", + "print('is the same as using __getitem__:', my_list.__getitem__(slice(1, None)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `__getitem__` method is modified in pydna to deal with `Dseq` objects and returns a slice of the `Dseq` object, defined by the a start value and a stop value, similarly to string indexing. In other words, `__getitem__` indexes `Dseq`. Note that '__getitem__' (and, consequently, `[]`) uses zero-based indexing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-3)\n", + "tat\n", + "ata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq(\"aatataa\")\n", + "my_seq[2:5]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`__getitem__` respects overhangs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-5)\n", + "tata\n", + "atatt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_seq = Dseq.from_full_sequence_and_overhangs(\"aatataa\", crick_ovhg=0, watson_ovhg=-1)\n", + "my_seq[2:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that index zero corresponds to the leftmost base of the sequence, which might not necessarily be on the `watson` strand. Let's create a sequence that has an overhang on the left side." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-10)\n", + " acgttcc\n", + "ttatgcaagg" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_with_overhangs = Dseq.from_full_sequence_and_overhangs(\"aatacgttcc\", crick_ovhg=3, watson_ovhg=0)\n", + "sequence_with_overhangs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When we index starting from `2`, we don't start counting on the watson, but on the crick strand since it is the leftmost one." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-8)\n", + " acgttcc\n", + "atgcaagg" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_with_overhangs[2:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Slicing circular sequences\n", + "When slicing circular `Dseq` objects we get linear sequences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-4)\n", + "atct\n", + "taga" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circular_seq = Dseq(\"aatctaa\", circular=True)\n", + "circular_seq[1:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can slice circular sequences across the origin (where index is zero) if the first index is bigger than the second index. This is demonstrated in the example below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-4)\n", + "aaaa\n", + "tttt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circular_seq[5:2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Printing sequences to the console: `__repr__` and `__str__`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`__repr__` and `__str__` are methods present in all python classes that return a string representation of an object. `__str__` is called by the `print` function, and `__repr__` is used by the console or notebook output when the object is not assigned to a variable. Below is an example with a `date` object:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> print statement: 2023-08-15\n", + "> repr: datetime.date(2023, 8, 15)\n", + "> repr from class method: datetime.date(2023, 8, 15)\n", + "\n", + "> console output:\n" + ] }, - "nbformat": 4, - "nbformat_minor": 2 + { + "data": { + "text/plain": [ + "datetime.date(2023, 8, 15)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import datetime\n", + "\n", + "my_date = datetime.date(2023, 8, 15)\n", + "\n", + "print('> print statement:', my_date)\n", + "print('> repr:', repr(my_date))\n", + "print('> repr from class method:', my_date.__repr__())\n", + "\n", + "print()\n", + "print('> console output:')\n", + "my_date" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In a similar way, `__repr__` and `__str__` methods are used by pydna to represent sequences as strings for different purposes:\n", + "\n", + "* `__repr__` is used to make a figure-like representation that shows both strands and the overhangs.\n", + "* `__str__` is used to return the entire sequence as a string of characters (from the left-most to the right-most base of both strands), the way we would store it in a FASTA file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> figure-like representation:\n", + " Dseq(-8)\n", + "aaatta\n", + " aattt\n", + "\n", + "> string representation:\n", + " aaattaaa\n" + ] + } + ], + "source": [ + "my_seq = Dseq.from_full_sequence_and_overhangs(\"aaattaaa\", crick_ovhg=-3, watson_ovhg=-2)\n", + "print('> figure-like representation:\\n', my_seq.__repr__())\n", + "print()\n", + "print('> string representation:\\n', my_seq)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that on the string representation, the bases correspond to the entire sequence provided, even when they are only present on either the `watson` or `crick` strand. In the example above, the last two `aa` bases are missing from the `watson` strand, and that only the `crick` strand has them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Edge cases\n", + "\n", + "You can create arbitrary double-stranded sequences that are not complementary if you specify both strands and an overhang, but you won't be able to use them for molecular biology simulations. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseq(-6)\n", + " xxxx\n", + "tata" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dseq(\"xxxx\", \"atat\", ovhg=2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Dseq_Features.ipynb b/docs/notebooks/Dseq_Features.ipynb index 9e0e7fb2..61dc12cb 100644 --- a/docs/notebooks/Dseq_Features.ipynb +++ b/docs/notebooks/Dseq_Features.ipynb @@ -1,818 +1,818 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Working with Features using the Dseqrecord class\n", - "\n", - "> Before working with features, check how to import sequences from files in the [Importing_Seqs notebook](./Importing_Seqs.ipynb).\n", - ">\n", - "> For full library documentation, visit [here](https://pydna-group.github.io/pydna/).\n", - "\n", - "Some sequence file formats (like Genbank) include features, describing key biological properties of sequence regions. In Genbank, features \"include genes, gene products, as well as regions of biological significance reported in the sequence.\" (See [here](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/) for a description of a Genbank file and associated terminologies/annotations) Examples include coding sequences (CDS), introns, promoters, etc.\n", - "\n", - "pydna offers many ways to easily view, add, extract, and write features into a Genbank file via the `Dseqrecord` class. After reading a file into a `Dseqrecord` object, we can check out the list of features in the record using the following code. This example uses the sample record [U49845](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: source\n", - "location: [0:5028](+)\n", - "qualifiers:\n", - " Key: chromosome, Value: ['IX']\n", - " Key: db_xref, Value: ['taxon:4932']\n", - " Key: mol_type, Value: ['genomic DNA']\n", - " Key: organism, Value: ['Saccharomyces cerevisiae']\n", - "\n", - "type: mRNA\n", - "location: [<0:>206](+)\n", - "qualifiers:\n", - " Key: product, Value: ['TCP1-beta']\n", - "\n", - "type: CDS\n", - "location: [<0:206](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['3']\n", - " Key: product, Value: ['TCP1-beta']\n", - " Key: protein_id, Value: ['AAA98665.1']\n", - " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", - "\n", - "type: gene\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - "\n", - "type: mRNA\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - " Key: product, Value: ['Axl2p']\n", - "\n", - "type: CDS\n", - "location: [686:3158](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['AXL2']\n", - " Key: note, Value: ['plasma membrane glycoprotein']\n", - " Key: product, Value: ['Axl2p']\n", - " Key: protein_id, Value: ['AAA98666.1']\n", - " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", - "\n", - "type: gene\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - "\n", - "type: mRNA\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - "\n", - "type: CDS\n", - "location: [3299:4037](-)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - " Key: protein_id, Value: ['AAA98667.1']\n", - " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", - "\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.parsers import parse\n", - "\n", - "#Import your file into python. \n", - "file_path = \"./U49845.gb\"\n", - "records = parse(file_path)\n", - "sample_record = records[0]\n", - "\n", - "# List all features\n", - "for feature in sample_record.features:\n", - " print(feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additional ways to view and search for particular features are shown at the bottom of the page under \"Other Methods to Viewing Features\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adding Features and Qualifiers\n", - "\n", - "To add new feature to describe a region of interest to a record, for instance a region that you would like to perform a PCR, you need to create a `SeqFeature` (sequence feature). The minimal information required is:\n", - "* A `FeatureLocation`: position of the feature in the sequence.\n", - "* The `type` of feature you want to add.\n", - "\n", - "\n", - "🚨🚨 **VERY IMPORTANT** 🚨🚨. Note that `FeatureLocation`s are like python ranges (zero-based open intervals), whereas in GenBank files, locations are one-based closed intervals. For instance, the following code adds a new feature from the 2nd to the 5th nucleotide (`FeatureLocation(3, 15)`), of the `gene` type, but in the GenBank file will be represented as `4..15`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: gene\n", - "location: [3:15]\n", - "qualifiers:\n", - "\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "from Bio.SeqFeature import FeatureLocation, SeqFeature\n", - "\n", - "# Create a dummy record\n", - "dummy_record = Dseqrecord(\"aaaATGCGTACGTGAacgt\")\n", - "\n", - "# Define the locations of a CDS\n", - "location = FeatureLocation(3, 15)\n", - "\n", - "# Create a SeqFeature with the type mRNA\n", - "my_feature = SeqFeature(location=location, type=\"gene\")\n", - "\n", - "# Add my_feature to dummy_record with .append\n", - "dummy_record.features.append(my_feature)\n", - "\n", - "# Confirm that my_feature has been added\n", - "print(dummy_record.features[-1])\n", - "\n", - "# Print the feature in GenBank format (see how the location is `4..15`)\n", - "print(dummy_record.format(\"genbank\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To give further information about a feature, we can add a qualifier using the `qualifiers` property of `SeqFeature`, which contains a dictionary of qualifiers. For instance, if I would like to note a new feature of type 'domain', between 3-9 bases as my region of interest, I can instantiate the `SeqFeature` class object as such.\n", - "\n", - "> Note that a new feature is always added to the last position of the features list." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">> Feature was added:\n", - "type: domain\n", - "location: [3:9]\n", - "qualifiers:\n", - " Key: Note, Value: ['Region of interest']\n", - "\n", - "\n", - ">> GenBank format:\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - " domain 4..9\n", - " /Note=\"Region of interest\"\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "location = FeatureLocation(3, 9)\n", - "\n", - "# Create a SeqFeature with a qualifier\n", - "my_feature2 = SeqFeature(location=location, type=\"domain\", qualifiers={\"Note\": [\"Region of interest\"]})\n", - "\n", - "# Add my_feature to my_record with .append\n", - "dummy_record.features.append(my_feature2)\n", - "\n", - "# Confirm that my_feature has been added\n", - "print('>> Feature was added:')\n", - "print(dummy_record.features[-1])\n", - "print()\n", - "\n", - "# Print the feature in GenBank format\n", - "print('>> GenBank format:')\n", - "print(dummy_record.format(\"genbank\"))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**🤔 Best practices for qualifiers:**\n", - "\n", - "The values in the `qualifiers` dictionary should be lists. The reason for this is that in a GenBank file, a single feature can have multiple values for a single qualifier. Below is a real world of the ase1 CDS example from the _S. pombe_ genome in EMBL format:\n", - "\n", - "```\n", - "FT CDS join(1878362..1878785,1878833..1880604)\n", - "FT /colour=2\n", - "FT /primary_name=\"ase1\"\n", - "FT /product=\"antiparallel microtubule cross-linking factor\n", - "FT Ase1\"\n", - "FT /systematic_id=\"SPAPB1A10.09\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in eukaryotes; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in metazoa; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in vertebrates; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution,\n", - "FT predominantly single copy (one to one); date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in fungi; date=20081110\"\n", - "FT /controlled_curation=\"term=species distribution, conserved\n", - "FT in eukaryotes only; date=20081110\"\n", - "```\n", - "\n", - "Note how there are several `controlled_curation` qualifiers, therefore it makes sense to store them as a list.\n", - "\n", - "By default, you can add any type of object in the qualifiers dictionary, and most things will work if you add a string. However, you risk overwriting the existing value for a qualifier, so best practice is:\n", - "1. Check if the qualifier already exists using `if \"qualifier_name\" in feature.qualifiers`\n", - "2. If it exists, append to the existing list of values using `feature.qualifiers[\"qualifier_name\"].append(\"new_value\")`\n", - "3. If it does not exist, add it to the qualifiers dictionary using `feature.qualifiers[\"qualifier_name\"] = [\"new_value\"]`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that `Bio.SeqFeatures` does not automatically assume a sequence strand for the feature. If you would like to refer to a feature on the positive or minus strand, you can add a parameter in `FeatureLocation` specifying `strand=+1` or `strand=-1`. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: domain\n", - "location: [15:19](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['example_domain']\n", - "\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - " domain 4..9\n", - " /Note=\"Region of interest\"\n", - " domain complement(16..19)\n", - " /gene=\"example_domain\"\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "#Create a location specifying the minus strand\n", - "location = FeatureLocation(15, 19, strand=-1)\n", - "\n", - "my_feature3 = SeqFeature(location=location, type=\"domain\", qualifiers={\"gene\":[\"example_domain\"]})\n", - "\n", - "dummy_record.features.append(my_feature3)\n", - "\n", - "print(dummy_record.features[-1])\n", - "\n", - "print(dummy_record.format(\"genbank\"))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Adding a Feature with Parts\n", - "\n", - "To add a feature with parts, like a CDS with introns, we need to use a `CompoundLocation` object when creating a `SeqFeature`.\n", - "\n", - "The example code below adds a CDS with two parts, between 3-9bp and 12-15bp, to my features list. In a real-world scenario this would represent a CDS with an intron that skips the `ACG` codon: ATGCGT~~ACG~~TGA" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: CDS\n", - "location: join{[3:9], [12:15]}\n", - "qualifiers:\n", - " Key: gene, Value: ['example_gene']\n", - "\n", - "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 4..15\n", - " domain 4..9\n", - " /Note=\"Region of interest\"\n", - " domain complement(16..19)\n", - " /gene=\"example_domain\"\n", - " CDS join(4..9,13..15)\n", - " /gene=\"example_gene\"\n", - "ORIGIN\n", - " 1 aaaatgcgta cgtgaacgt\n", - "//\n" - ] - } - ], - "source": [ - "from Bio.SeqFeature import CompoundLocation\n", - "\n", - "# Define the locations of the CDS\n", - "locations = [FeatureLocation(3, 9), FeatureLocation(12, 15)]\n", - "\n", - "# Create a compound location from these parts\n", - "compound_location = CompoundLocation(locations)\n", - "\n", - "# Create a SeqFeature with this compound location, including type and qualifiers. \n", - "cds_feature = SeqFeature(location=compound_location, type=\"CDS\", qualifiers={\"gene\": [\"example_gene\"]})\n", - "\n", - "# Add the feature to the Dseqrecord\n", - "dummy_record.features.append(cds_feature)\n", - "\n", - "print(dummy_record.features[-1])\n", - "\n", - "print(dummy_record.format(\"genbank\"))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can even extract a protein record as follows (see how the protein sequence is `MR`, skipping the intron):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "ProteinSeq('MR')\n" - ] - } - ], - "source": [ - "sub_record = dummy_record.features[-1].extract(dummy_record)\n", - "\n", - "print(sub_record.translate())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Standard Feature Types and Qualifiers\n", - "\n", - "`pydna` and `Bio.SeqFeature` suppports all the conventional feature types through the `type` parameters. A non-exhaustive list include gene, CDS, promoter, exon, intron, 5' UTR, 3' UTR, terminator, enhancer, and RBS. You can also define custom features, which could be useful for synthetic biology applications. For instance, you might want to have Bio_brick or spacer features to describe a synthetic standardised plasmid construct. \n", - "\n", - "It is important to note that while `pydna` and `Bio.SeqFeature` does not restrict the feature types you can use, sticking to standard types helps maintain compatibility with other bioinformatics tools and databases. Please refer to the official [GenBank_Feature_Table](https://www.insdc.org/submitting-standards/feature-table/#2), that lists the standard feature types and their associated qualifiers." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Further documentation for `SeqFeature`, `CompoundLocation`, and `FeatureLocation` can be found in the `SeqFeature` module [here](https://biopython.org/docs/1.75/api/Bio.SeqFeature.html). " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Handling Origin Spanning Features\n", - "\n", - "An origin spanning feature is a special type of feature that crosses over a circular sequence's origin. In pydna, such a feature is represented as a feature with parts, joining the part of the sequence immediately before the origin and immediately after the origin. They can be added using `CompoundLocation` as normal. \n", - "\n", - "An origin spanning feature, between base 19 to base 6, in a 25bp long circular sequence, is represented like so: \n", - "\n", - "```\n", - "type: gene \n", - "location: join{[19:25](+), [0:6](+)} \n", - "qualifiers: gene, Value: example_gene \n", - "```\n", - "\n", - "This feature will be displayed as a single feature in SnapGene viewer and Benchling, since they support this convention." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">> Feature:\n", - "type: misc\n", - "location: join{[19:25], [0:6]}\n", - "qualifiers:\n", - " Key: gene, Value: ['example origin spanning gene']\n", - "\n", - ">> Feature sequence:\n", - "ATGCGTACGTGA\n", - "\n", - ">> GenBank format:\n", - "LOCUS name 25 bp DNA circular UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " misc join(20..25,1..6)\n", - " /gene=\"example origin spanning gene\"\n", - "ORIGIN\n", - " 1 acgtgaaaaa aaaaaaaaaa tgcgt\n", - "//\n" - ] - } - ], - "source": [ - "circular_record = Dseqrecord('ACGTGAaaaaaaaaaaaaaATGCGT', circular=True)\n", - "\n", - "location = [FeatureLocation(19,25), FeatureLocation(0, 6)]\n", - "ori_feat_location = CompoundLocation(location)\n", - "ori_feature = SeqFeature(location=ori_feat_location, type=\"misc\", qualifiers={\"gene\": [\"example origin spanning gene\"]})\n", - "circular_record.features.append(ori_feature)\n", - "\n", - "print('>> Feature:')\n", - "print(circular_record.features[-1])\n", - "\n", - "# Note how the feature sequence is extracted properly across the origin.\n", - "print('>> Feature sequence:')\n", - "print(circular_record.features[-1].extract(circular_record).seq)\n", - "print()\n", - "\n", - "print('>> GenBank format:')\n", - "print(circular_record.format(\"genbank\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Other Methods to Viewing Features\n", - "\n", - "pydna also provides the `list_features` method as a simple way to list all the features in a `Dseqrecord` object. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----+------------------+-----+-------+-------+------+--------+------+\n", - "| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n", - "+-----+------------------+-----+-------+-------+------+--------+------+\n", - "| 0 | nd | --> | 0 | 5028 | 5028 | source | no |\n", - "| 1 | nd | --> | <0 | >206 | 206 | mRNA | no |\n", - "| 2 | nd | --> | <0 | 206 | 206 | CDS | no |\n", - "| 3 | nd | --> | <686 | >3158 | 2472 | gene | yes |\n", - "| 4 | nd | --> | <686 | >3158 | 2472 | mRNA | yes |\n", - "| 5 | N:plasma membran | --> | 686 | 3158 | 2472 | CDS | yes |\n", - "| 6 | nd | <-- | <3299 | >4037 | 738 | gene | yes |\n", - "| 7 | nd | <-- | <3299 | >4037 | 738 | mRNA | yes |\n", - "| 8 | nd | <-- | 3299 | 4037 | 738 | CDS | yes |\n", - "+-----+------------------+-----+-------+-------+------+--------+------+\n" - ] - } - ], - "source": [ - "print(sample_record.list_features())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This method is convenient for checking-out a brief overview of each feature, without reading through an entire sequence record.\n", - "\n", - "Alternatively, we can look for specific features using their qualifiers. For instance:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting all CDS features:\n", - "type: CDS\n", - "location: [<0:206](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['3']\n", - " Key: product, Value: ['TCP1-beta']\n", - " Key: protein_id, Value: ['AAA98665.1']\n", - " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", - "\n", - "type: CDS\n", - "location: [686:3158](+)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['AXL2']\n", - " Key: note, Value: ['plasma membrane glycoprotein']\n", - " Key: product, Value: ['Axl2p']\n", - " Key: protein_id, Value: ['AAA98666.1']\n", - " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", - "\n", - "type: CDS\n", - "location: [3299:4037](-)\n", - "qualifiers:\n", - " Key: codon_start, Value: ['1']\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - " Key: protein_id, Value: ['AAA98667.1']\n", - " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", - "\n" - ] - } - ], - "source": [ - "# Filter based on feature type\n", - "print('Getting all CDS features:')\n", - "cds_features = [f for f in sample_record.features if f.type == \"CDS\"]\n", - "for feature in cds_features:\n", - " print(feature)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: gene\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - "\n" - ] - } - ], - "source": [ - "# Find a particular feature by its qualifier (e.g. gene name)\n", - "rev7_cds_feature = next(f for f in sample_record.features if \n", - " f.type == \"gene\" and\n", - " \"gene\" in f.qualifiers and \"REV7\" in f.qualifiers[\"gene\"]\n", - " )\n", - "\n", - "print(rev7_cds_feature)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you would like to search for another type of features, simply replace the `\"gene\"` with your desired feature type in quotation marks." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Removing Features\n", - "\n", - "In pydna, we can search for the feature that we would like to remove using the feature's types or qualififers. For instance, we can modify the features list to exclude all CDS:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: source\n", - "location: [0:5028](+)\n", - "qualifiers:\n", - " Key: chromosome, Value: ['IX']\n", - " Key: db_xref, Value: ['taxon:4932']\n", - " Key: mol_type, Value: ['genomic DNA']\n", - " Key: organism, Value: ['Saccharomyces cerevisiae']\n", - "\n", - "type: mRNA\n", - "location: [<0:>206](+)\n", - "qualifiers:\n", - " Key: product, Value: ['TCP1-beta']\n", - "\n", - "type: gene\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - "\n", - "type: mRNA\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - " Key: product, Value: ['Axl2p']\n", - "\n", - "type: gene\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - "\n", - "type: mRNA\n", - "location: [<3299:>4037](-)\n", - "qualifiers:\n", - " Key: gene, Value: ['REV7']\n", - " Key: product, Value: ['Rev7p']\n", - "\n" - ] - } - ], - "source": [ - "#Remove all CDS type features from my feature list\n", - "sample_record.features = [f for f in sample_record.features if not (f.type == \"CDS\")]\n", - "\n", - "for feature in sample_record.features:\n", - " print(feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also modify the features list to exclude a specific gene:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type: source\n", - "location: [0:5028](+)\n", - "qualifiers:\n", - " Key: chromosome, Value: ['IX']\n", - " Key: db_xref, Value: ['taxon:4932']\n", - " Key: mol_type, Value: ['genomic DNA']\n", - " Key: organism, Value: ['Saccharomyces cerevisiae']\n", - "\n", - "type: mRNA\n", - "location: [<0:>206](+)\n", - "qualifiers:\n", - " Key: product, Value: ['TCP1-beta']\n", - "\n", - "type: gene\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - "\n", - "type: mRNA\n", - "location: [<686:>3158](+)\n", - "qualifiers:\n", - " Key: gene, Value: ['AXL2']\n", - " Key: product, Value: ['Axl2p']\n", - "\n" - ] - } - ], - "source": [ - "#Exclude REV7 from my feature list\n", - "sample_record.features = [f for f in sample_record.features if not ('gene' in f.qualifiers and 'REV7' in f.qualifiers['gene'])]\n", - "\n", - "for feature in sample_record.features:\n", - " print(feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with Features using the Dseqrecord class\n", + "\n", + "> Before working with features, check how to import sequences from files in the [Importing_Seqs notebook](./Importing_Seqs.ipynb).\n", + ">\n", + "> For full library documentation, visit [here](https://pydna-group.github.io/pydna/).\n", + "\n", + "Some sequence file formats (like Genbank) include features, describing key biological properties of sequence regions. In Genbank, features \"include genes, gene products, as well as regions of biological significance reported in the sequence.\" (See [here](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/) for a description of a Genbank file and associated terminologies/annotations) Examples include coding sequences (CDS), introns, promoters, etc.\n", + "\n", + "pydna offers many ways to easily view, add, extract, and write features into a Genbank file via the `Dseqrecord` class. After reading a file into a `Dseqrecord` object, we can check out the list of features in the record using the following code. This example uses the sample record [U49845](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: source\n", + "location: [0:5028](+)\n", + "qualifiers:\n", + " Key: chromosome, Value: ['IX']\n", + " Key: db_xref, Value: ['taxon:4932']\n", + " Key: mol_type, Value: ['genomic DNA']\n", + " Key: organism, Value: ['Saccharomyces cerevisiae']\n", + "\n", + "type: mRNA\n", + "location: [<0:>206](+)\n", + "qualifiers:\n", + " Key: product, Value: ['TCP1-beta']\n", + "\n", + "type: CDS\n", + "location: [<0:206](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['3']\n", + " Key: product, Value: ['TCP1-beta']\n", + " Key: protein_id, Value: ['AAA98665.1']\n", + " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", + "\n", + "type: gene\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + "\n", + "type: mRNA\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + " Key: product, Value: ['Axl2p']\n", + "\n", + "type: CDS\n", + "location: [686:3158](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['AXL2']\n", + " Key: note, Value: ['plasma membrane glycoprotein']\n", + " Key: product, Value: ['Axl2p']\n", + " Key: protein_id, Value: ['AAA98666.1']\n", + " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", + "\n", + "type: gene\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + "\n", + "type: mRNA\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + "\n", + "type: CDS\n", + "location: [3299:4037](-)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + " Key: protein_id, Value: ['AAA98667.1']\n", + " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", + "\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.parsers import parse\n", + "\n", + "#Import your file into python. \n", + "file_path = \"./U49845.gb\"\n", + "records = parse(file_path)\n", + "sample_record = records[0]\n", + "\n", + "# List all features\n", + "for feature in sample_record.features:\n", + " print(feature)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional ways to view and search for particular features are shown at the bottom of the page under \"Other Methods to Viewing Features\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding Features and Qualifiers\n", + "\n", + "To add new feature to describe a region of interest to a record, for instance a region that you would like to perform a PCR, you need to create a `SeqFeature` (sequence feature). The minimal information required is:\n", + "* A `FeatureLocation`: position of the feature in the sequence.\n", + "* The `type` of feature you want to add.\n", + "\n", + "\n", + "🚨🚨 **VERY IMPORTANT** 🚨🚨. Note that `FeatureLocation`s are like python ranges (zero-based open intervals), whereas in GenBank files, locations are one-based closed intervals. For instance, the following code adds a new feature from the 2nd to the 5th nucleotide (`FeatureLocation(3, 15)`), of the `gene` type, but in the GenBank file will be represented as `4..15`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: gene\n", + "location: [3:15]\n", + "qualifiers:\n", + "\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "from Bio.SeqFeature import FeatureLocation, SeqFeature\n", + "\n", + "# Create a dummy record\n", + "dummy_record = Dseqrecord(\"aaaATGCGTACGTGAacgt\")\n", + "\n", + "# Define the locations of a CDS\n", + "location = FeatureLocation(3, 15)\n", + "\n", + "# Create a SeqFeature with the type mRNA\n", + "my_feature = SeqFeature(location=location, type=\"gene\")\n", + "\n", + "# Add my_feature to dummy_record with .append\n", + "dummy_record.features.append(my_feature)\n", + "\n", + "# Confirm that my_feature has been added\n", + "print(dummy_record.features[-1])\n", + "\n", + "# Print the feature in GenBank format (see how the location is `4..15`)\n", + "print(dummy_record.format(\"genbank\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To give further information about a feature, we can add a qualifier using the `qualifiers` property of `SeqFeature`, which contains a dictionary of qualifiers. For instance, if I would like to note a new feature of type 'domain', between 3-9 bases as my region of interest, I can instantiate the `SeqFeature` class object as such.\n", + "\n", + "> Note that a new feature is always added to the last position of the features list." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">> Feature was added:\n", + "type: domain\n", + "location: [3:9]\n", + "qualifiers:\n", + " Key: Note, Value: ['Region of interest']\n", + "\n", + "\n", + ">> GenBank format:\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + " domain 4..9\n", + " /Note=\"Region of interest\"\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "location = FeatureLocation(3, 9)\n", + "\n", + "# Create a SeqFeature with a qualifier\n", + "my_feature2 = SeqFeature(location=location, type=\"domain\", qualifiers={\"Note\": [\"Region of interest\"]})\n", + "\n", + "# Add my_feature to my_record with .append\n", + "dummy_record.features.append(my_feature2)\n", + "\n", + "# Confirm that my_feature has been added\n", + "print('>> Feature was added:')\n", + "print(dummy_record.features[-1])\n", + "print()\n", + "\n", + "# Print the feature in GenBank format\n", + "print('>> GenBank format:')\n", + "print(dummy_record.format(\"genbank\"))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**🤔 Best practices for qualifiers:**\n", + "\n", + "The values in the `qualifiers` dictionary should be lists. The reason for this is that in a GenBank file, a single feature can have multiple values for a single qualifier. Below is a real world of the ase1 CDS example from the _S. pombe_ genome in EMBL format:\n", + "\n", + "```\n", + "FT CDS join(1878362..1878785,1878833..1880604)\n", + "FT /colour=2\n", + "FT /primary_name=\"ase1\"\n", + "FT /product=\"antiparallel microtubule cross-linking factor\n", + "FT Ase1\"\n", + "FT /systematic_id=\"SPAPB1A10.09\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in eukaryotes; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in metazoa; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in vertebrates; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution,\n", + "FT predominantly single copy (one to one); date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in fungi; date=20081110\"\n", + "FT /controlled_curation=\"term=species distribution, conserved\n", + "FT in eukaryotes only; date=20081110\"\n", + "```\n", + "\n", + "Note how there are several `controlled_curation` qualifiers, therefore it makes sense to store them as a list.\n", + "\n", + "By default, you can add any type of object in the qualifiers dictionary, and most things will work if you add a string. However, you risk overwriting the existing value for a qualifier, so best practice is:\n", + "1. Check if the qualifier already exists using `if \"qualifier_name\" in feature.qualifiers`\n", + "2. If it exists, append to the existing list of values using `feature.qualifiers[\"qualifier_name\"].append(\"new_value\")`\n", + "3. If it does not exist, add it to the qualifiers dictionary using `feature.qualifiers[\"qualifier_name\"] = [\"new_value\"]`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `Bio.SeqFeatures` does not automatically assume a sequence strand for the feature. If you would like to refer to a feature on the positive or minus strand, you can add a parameter in `FeatureLocation` specifying `strand=+1` or `strand=-1`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: domain\n", + "location: [15:19](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['example_domain']\n", + "\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + " domain 4..9\n", + " /Note=\"Region of interest\"\n", + " domain complement(16..19)\n", + " /gene=\"example_domain\"\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "#Create a location specifying the minus strand\n", + "location = FeatureLocation(15, 19, strand=-1)\n", + "\n", + "my_feature3 = SeqFeature(location=location, type=\"domain\", qualifiers={\"gene\":[\"example_domain\"]})\n", + "\n", + "dummy_record.features.append(my_feature3)\n", + "\n", + "print(dummy_record.features[-1])\n", + "\n", + "print(dummy_record.format(\"genbank\"))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding a Feature with Parts\n", + "\n", + "To add a feature with parts, like a CDS with introns, we need to use a `CompoundLocation` object when creating a `SeqFeature`.\n", + "\n", + "The example code below adds a CDS with two parts, between 3-9bp and 12-15bp, to my features list. In a real-world scenario this would represent a CDS with an intron that skips the `ACG` codon: ATGCGT~~ACG~~TGA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: CDS\n", + "location: join{[3:9], [12:15]}\n", + "qualifiers:\n", + " Key: gene, Value: ['example_gene']\n", + "\n", + "LOCUS name 19 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 4..15\n", + " domain 4..9\n", + " /Note=\"Region of interest\"\n", + " domain complement(16..19)\n", + " /gene=\"example_domain\"\n", + " CDS join(4..9,13..15)\n", + " /gene=\"example_gene\"\n", + "ORIGIN\n", + " 1 aaaatgcgta cgtgaacgt\n", + "//\n" + ] + } + ], + "source": [ + "from Bio.SeqFeature import CompoundLocation\n", + "\n", + "# Define the locations of the CDS\n", + "locations = [FeatureLocation(3, 9), FeatureLocation(12, 15)]\n", + "\n", + "# Create a compound location from these parts\n", + "compound_location = CompoundLocation(locations)\n", + "\n", + "# Create a SeqFeature with this compound location, including type and qualifiers. \n", + "cds_feature = SeqFeature(location=compound_location, type=\"CDS\", qualifiers={\"gene\": [\"example_gene\"]})\n", + "\n", + "# Add the feature to the Dseqrecord\n", + "dummy_record.features.append(cds_feature)\n", + "\n", + "print(dummy_record.features[-1])\n", + "\n", + "print(dummy_record.format(\"genbank\"))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can even extract a protein record as follows (see how the protein sequence is `MR`, skipping the intron):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "ProteinSeq('MR')\n" + ] + } + ], + "source": [ + "sub_record = dummy_record.features[-1].extract(dummy_record)\n", + "\n", + "print(sub_record.translate())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard Feature Types and Qualifiers\n", + "\n", + "`pydna` and `Bio.SeqFeature` suppports all the conventional feature types through the `type` parameters. A non-exhaustive list include gene, CDS, promoter, exon, intron, 5' UTR, 3' UTR, terminator, enhancer, and RBS. You can also define custom features, which could be useful for synthetic biology applications. For instance, you might want to have Bio_brick or spacer features to describe a synthetic standardised plasmid construct. \n", + "\n", + "It is important to note that while `pydna` and `Bio.SeqFeature` does not restrict the feature types you can use, sticking to standard types helps maintain compatibility with other bioinformatics tools and databases. Please refer to the official [GenBank_Feature_Table](https://www.insdc.org/submitting-standards/feature-table/#2), that lists the standard feature types and their associated qualifiers." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Further documentation for `SeqFeature`, `CompoundLocation`, and `FeatureLocation` can be found in the `SeqFeature` module [here](https://biopython.org/docs/1.75/api/Bio.SeqFeature.html). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handling Origin Spanning Features\n", + "\n", + "An origin spanning feature is a special type of feature that crosses over a circular sequence's origin. In pydna, such a feature is represented as a feature with parts, joining the part of the sequence immediately before the origin and immediately after the origin. They can be added using `CompoundLocation` as normal. \n", + "\n", + "An origin spanning feature, between base 19 to base 6, in a 25bp long circular sequence, is represented like so: \n", + "\n", + "```\n", + "type: gene \n", + "location: join{[19:25](+), [0:6](+)} \n", + "qualifiers: gene, Value: example_gene \n", + "```\n", + "\n", + "This feature will be displayed as a single feature in SnapGene viewer and Benchling, since they support this convention." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">> Feature:\n", + "type: misc\n", + "location: join{[19:25], [0:6]}\n", + "qualifiers:\n", + " Key: gene, Value: ['example origin spanning gene']\n", + "\n", + ">> Feature sequence:\n", + "ATGCGTACGTGA\n", + "\n", + ">> GenBank format:\n", + "LOCUS name 25 bp DNA circular UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " misc join(20..25,1..6)\n", + " /gene=\"example origin spanning gene\"\n", + "ORIGIN\n", + " 1 acgtgaaaaa aaaaaaaaaa tgcgt\n", + "//\n" + ] + } + ], + "source": [ + "circular_record = Dseqrecord('ACGTGAaaaaaaaaaaaaaATGCGT', circular=True)\n", + "\n", + "location = [FeatureLocation(19,25), FeatureLocation(0, 6)]\n", + "ori_feat_location = CompoundLocation(location)\n", + "ori_feature = SeqFeature(location=ori_feat_location, type=\"misc\", qualifiers={\"gene\": [\"example origin spanning gene\"]})\n", + "circular_record.features.append(ori_feature)\n", + "\n", + "print('>> Feature:')\n", + "print(circular_record.features[-1])\n", + "\n", + "# Note how the feature sequence is extracted properly across the origin.\n", + "print('>> Feature sequence:')\n", + "print(circular_record.features[-1].extract(circular_record).seq)\n", + "print()\n", + "\n", + "print('>> GenBank format:')\n", + "print(circular_record.format(\"genbank\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other Methods to Viewing Features\n", + "\n", + "pydna also provides the `list_features` method as a simple way to list all the features in a `Dseqrecord` object. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----+------------------+-----+-------+-------+------+--------+------+\n", + "| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n", + "+-----+------------------+-----+-------+-------+------+--------+------+\n", + "| 0 | nd | --> | 0 | 5028 | 5028 | source | no |\n", + "| 1 | nd | --> | <0 | >206 | 206 | mRNA | no |\n", + "| 2 | nd | --> | <0 | 206 | 206 | CDS | no |\n", + "| 3 | nd | --> | <686 | >3158 | 2472 | gene | yes |\n", + "| 4 | nd | --> | <686 | >3158 | 2472 | mRNA | yes |\n", + "| 5 | N:plasma membran | --> | 686 | 3158 | 2472 | CDS | yes |\n", + "| 6 | nd | <-- | <3299 | >4037 | 738 | gene | yes |\n", + "| 7 | nd | <-- | <3299 | >4037 | 738 | mRNA | yes |\n", + "| 8 | nd | <-- | 3299 | 4037 | 738 | CDS | yes |\n", + "+-----+------------------+-----+-------+-------+------+--------+------+\n" + ] + } + ], + "source": [ + "print(sample_record.list_features())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is convenient for checking-out a brief overview of each feature, without reading through an entire sequence record.\n", + "\n", + "Alternatively, we can look for specific features using their qualifiers. For instance:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Getting all CDS features:\n", + "type: CDS\n", + "location: [<0:206](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['3']\n", + " Key: product, Value: ['TCP1-beta']\n", + " Key: protein_id, Value: ['AAA98665.1']\n", + " Key: translation, Value: ['SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM']\n", + "\n", + "type: CDS\n", + "location: [686:3158](+)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['AXL2']\n", + " Key: note, Value: ['plasma membrane glycoprotein']\n", + " Key: product, Value: ['Axl2p']\n", + " Key: protein_id, Value: ['AAA98666.1']\n", + " Key: translation, Value: ['MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML']\n", + "\n", + "type: CDS\n", + "location: [3299:4037](-)\n", + "qualifiers:\n", + " Key: codon_start, Value: ['1']\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + " Key: protein_id, Value: ['AAA98667.1']\n", + " Key: translation, Value: ['MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF']\n", + "\n" + ] + } + ], + "source": [ + "# Filter based on feature type\n", + "print('Getting all CDS features:')\n", + "cds_features = [f for f in sample_record.features if f.type == \"CDS\"]\n", + "for feature in cds_features:\n", + " print(feature)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: gene\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + "\n" + ] + } + ], + "source": [ + "# Find a particular feature by its qualifier (e.g. gene name)\n", + "rev7_cds_feature = next(f for f in sample_record.features if \n", + " f.type == \"gene\" and\n", + " \"gene\" in f.qualifiers and \"REV7\" in f.qualifiers[\"gene\"]\n", + " )\n", + "\n", + "print(rev7_cds_feature)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to search for another type of features, simply replace the `\"gene\"` with your desired feature type in quotation marks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removing Features\n", + "\n", + "In pydna, we can search for the feature that we would like to remove using the feature's types or qualififers. For instance, we can modify the features list to exclude all CDS:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: source\n", + "location: [0:5028](+)\n", + "qualifiers:\n", + " Key: chromosome, Value: ['IX']\n", + " Key: db_xref, Value: ['taxon:4932']\n", + " Key: mol_type, Value: ['genomic DNA']\n", + " Key: organism, Value: ['Saccharomyces cerevisiae']\n", + "\n", + "type: mRNA\n", + "location: [<0:>206](+)\n", + "qualifiers:\n", + " Key: product, Value: ['TCP1-beta']\n", + "\n", + "type: gene\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + "\n", + "type: mRNA\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + " Key: product, Value: ['Axl2p']\n", + "\n", + "type: gene\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + "\n", + "type: mRNA\n", + "location: [<3299:>4037](-)\n", + "qualifiers:\n", + " Key: gene, Value: ['REV7']\n", + " Key: product, Value: ['Rev7p']\n", + "\n" + ] + } + ], + "source": [ + "#Remove all CDS type features from my feature list\n", + "sample_record.features = [f for f in sample_record.features if not (f.type == \"CDS\")]\n", + "\n", + "for feature in sample_record.features:\n", + " print(feature)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also modify the features list to exclude a specific gene:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: source\n", + "location: [0:5028](+)\n", + "qualifiers:\n", + " Key: chromosome, Value: ['IX']\n", + " Key: db_xref, Value: ['taxon:4932']\n", + " Key: mol_type, Value: ['genomic DNA']\n", + " Key: organism, Value: ['Saccharomyces cerevisiae']\n", + "\n", + "type: mRNA\n", + "location: [<0:>206](+)\n", + "qualifiers:\n", + " Key: product, Value: ['TCP1-beta']\n", + "\n", + "type: gene\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + "\n", + "type: mRNA\n", + "location: [<686:>3158](+)\n", + "qualifiers:\n", + " Key: gene, Value: ['AXL2']\n", + " Key: product, Value: ['Axl2p']\n", + "\n" + ] + } + ], + "source": [ + "#Exclude REV7 from my feature list\n", + "sample_record.features = [f for f in sample_record.features if not ('gene' in f.qualifiers and 'REV7' in f.qualifiers['gene'])]\n", + "\n", + "for feature in sample_record.features:\n", + " print(feature)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Example_CRISPR.ipynb b/docs/notebooks/Example_CRISPR.ipynb index 6a3be063..58d37197 100644 --- a/docs/notebooks/Example_CRISPR.ipynb +++ b/docs/notebooks/Example_CRISPR.ipynb @@ -1,270 +1,270 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Implementation of Oligonucleotide-based CRISPR-Cas9 toolbox for efficient engineering of Komagataella phaffii \n", - "\n", - "\n", - "In this example we wanted to give a real life intuition on how to use the module in practice. \n", - "\n", - "For this purpose we have chosen to use the oligonucleotide-based CRISPR-Cas9 toolbox that i described \n", - "here by Strucko et al 2024, in the industrially relevant K. phaffi production organism: \n", - "\n", - "https://academic.oup.com/femsyr/article/doi/10.1093/femsyr/foae026/7740463?login=false " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from IPython.display import Image\n", - "Image(url=\"https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/femsyr/24/10.1093_femsyr_foae026/1/m_foae026fig3.jpeg?Expires=1730974846&Signature=iBKvkhkUn1823IljQ~1uFEnKO0VqWrwiXADvCwQLz6Yv8yDEAFkgt~tsLrXKFTmGYIq3ZINcj5a5yNgs4cP4NeCvRcQh7Ad~1ZejIwNrjqw51CJhGcZWPzz~NDr93QVLZZd2Re41cJNFKFmEu756KxrHQxwKTQe2QPMPfiKBvhvo8J28PERj3vNjZ3LQRsFp9qUPpdsZEyWIiNY92jsuy448YyuaGCgaC2ExGDLeuArTEJmq8gtb0QnTPV0dEdtoxIfZpgavdvO~QyqikjCLj6hebUYU1lH7StuS8oqCQE82CXO0IUcjYF6m2Lb0evXhqdLDQe90M-NrKjzNRmBA0A__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Figure 1. oligo assisted repair in K. phaffi. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "- Basically we can make two cuts in the genome, and repair it with an oligo (Figure 1A, 1B).\n", - "\n", - "\n", - "- We can start by loading in our target. Here we have integrated LAC12 in our K. phaffi strain but want to knock it out. \n", - "\n", - "\n", - "- Let's see how this can be implemented in pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import the gene we are going to work with" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 7127\n", - "ID: X06997.1\n", - "Name: X06997\n", - "Description: Kluyveromyces lactis LAC12 gene for lactose permease\n", - "Number of features: 8\n", - "/molecule_type=DNA\n", - "/topology=linear\n", - "/data_file_division=PLN\n", - "/date=25-JUL-2016\n", - "/accessions=['X06997']\n", - "/sequence_version=1\n", - "/keywords=['lactose permease', 'unidentified reading frame']\n", - "/source=Kluyveromyces lactis\n", - "/organism=Kluyveromyces lactis\n", - "/taxonomy=['Eukaryota', 'Fungi', 'Dikarya', 'Ascomycota', 'Saccharomycotina', 'Saccharomycetes', 'Saccharomycetales', 'Saccharomycetaceae', 'Kluyveromyces']\n", - "/references=[Reference(title='Primary structure of the lactose permease gene from the yeast Kluyveromyces lactis. Presence of an unusual transcript structure', ...), Reference(title='Direct Submission', ...)]\n", - "/comment=the sequence submitted starts from the 5'end of LAC4 gene but goes\n", - "to the opposite direction; therefore, base number 1 is -1199 of\n", - "LAC4 gene; for LAC4 gene seq. see\n", - "Mol. Cell. Biol. (1987)7,4369-4376.\n", - "Dseq(-7127)\n", - "GCGA..TTCG\n", - "CGCT..AAGC\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.crispr import cas9, protospacer\n", - "from pydna.genbank import Genbank\n", - "\n", - "# initalize your favourite gene\n", - "gb = Genbank(\"myself@email.com\") # Tell Genbank who you are!\n", - "gene = gb.nucleotide(\"X06997\") # Kluyveromyces lactis LAC12 gene for lactose permease that have been integrated into K. phaffi\n", - "target_dseq = Dseqrecord(gene)\n", - "print(target_dseq)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next we have chosen some guides and can add them to our cas9 enzymes and simulate the cuts." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cutting with guide 1: (Dseqrecord(-135), Dseqrecord(-6992))\n", - "cutting with guide 2: (Dseqrecord(-6793), Dseqrecord(-334))\n" - ] - } - ], - "source": [ - "\n", - "# Choose guides\n", - "guides = [\"CCCTAAGTCCTTTGAAGATT\", \"TATTATTTTGAGGTGCTTTA\"]\n", - "\n", - "# Create an enzyme object with the protospacer\n", - "enzyme = cas9(guides[0])\n", - "\n", - "# Simulate the cut with enzyme1\n", - "print('cutting with guide 1:', target_dseq.cut(enzyme))\n", - "\n", - "# Create an enzyme from the protospacer\n", - "enzyme2 = cas9(guides[1])\n", - "\n", - "# Simulate the cut with enzyme2\n", - "print('cutting with guide 2:', target_dseq.cut(enzyme2))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With these guides I would be able to generate a stable KO with a repair 60/90mer oligo." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My repair oligo for this experiment : AGGTGAACACACTCTGATGTAGTGCAGTCCCTAAGTCCTTTGAAGTTACGGACTCCTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTC \n", - "My repair oligo for this experiment length : 90 \n" - ] - } - ], - "source": [ - "repair_oligo = target_dseq.cut(enzyme)[0][-45:]+target_dseq.cut(enzyme2)[-1][:45]\n", - "repair_oligo.name = 'My repair oligo for this experiment'\n", - "print(f'{repair_oligo.name} : {repair_oligo.seq} ')\n", - "print(f'{repair_oligo.name} length : {len(repair_oligo.seq)} ')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The final edit gene would look like this in a case of homologous recombination. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
name|45\n",
-                            "     \\/\n",
-                            "     /\\\n",
-                            "     45|My repair oligo for this experiment|45\n",
-                            "                                            \\/\n",
-                            "                                            /\\\n",
-                            "                                            45|name
" - ], - "text/plain": [ - "Contig(-469)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.assembly import Assembly\n", - "\n", - "my_KO = Assembly((target_dseq.cut(enzyme)[0],repair_oligo, target_dseq.cut(enzyme2)[-1]), limit = 20 )\n", - "my_assembly_KO, *rest = my_KO.assemble_linear()\n", - "my_assembly_KO" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Implementation of Oligonucleotide-based CRISPR-Cas9 toolbox for efficient engineering of Komagataella phaffii \n", + "\n", + "\n", + "In this example we wanted to give a real life intuition on how to use the module in practice. \n", + "\n", + "For this purpose we have chosen to use the oligonucleotide-based CRISPR-Cas9 toolbox that i described \n", + "here by Strucko et al 2024, in the industrially relevant K. phaffi production organism: \n", + "\n", + "https://academic.oup.com/femsyr/article/doi/10.1093/femsyr/foae026/7740463?login=false " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Image\n", + "Image(url=\"https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/femsyr/24/10.1093_femsyr_foae026/1/m_foae026fig3.jpeg?Expires=1730974846&Signature=iBKvkhkUn1823IljQ~1uFEnKO0VqWrwiXADvCwQLz6Yv8yDEAFkgt~tsLrXKFTmGYIq3ZINcj5a5yNgs4cP4NeCvRcQh7Ad~1ZejIwNrjqw51CJhGcZWPzz~NDr93QVLZZd2Re41cJNFKFmEu756KxrHQxwKTQe2QPMPfiKBvhvo8J28PERj3vNjZ3LQRsFp9qUPpdsZEyWIiNY92jsuy448YyuaGCgaC2ExGDLeuArTEJmq8gtb0QnTPV0dEdtoxIfZpgavdvO~QyqikjCLj6hebUYU1lH7StuS8oqCQE82CXO0IUcjYF6m2Lb0evXhqdLDQe90M-NrKjzNRmBA0A__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Figure 1. oligo assisted repair in K. phaffi. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "- Basically we can make two cuts in the genome, and repair it with an oligo (Figure 1A, 1B).\n", + "\n", + "\n", + "- We can start by loading in our target. Here we have integrated LAC12 in our K. phaffi strain but want to knock it out. \n", + "\n", + "\n", + "- Let's see how this can be implemented in pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import the gene we are going to work with" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 7127\n", + "ID: X06997.1\n", + "Name: X06997\n", + "Description: Kluyveromyces lactis LAC12 gene for lactose permease\n", + "Number of features: 8\n", + "/molecule_type=DNA\n", + "/topology=linear\n", + "/data_file_division=PLN\n", + "/date=25-JUL-2016\n", + "/accessions=['X06997']\n", + "/sequence_version=1\n", + "/keywords=['lactose permease', 'unidentified reading frame']\n", + "/source=Kluyveromyces lactis\n", + "/organism=Kluyveromyces lactis\n", + "/taxonomy=['Eukaryota', 'Fungi', 'Dikarya', 'Ascomycota', 'Saccharomycotina', 'Saccharomycetes', 'Saccharomycetales', 'Saccharomycetaceae', 'Kluyveromyces']\n", + "/references=[Reference(title='Primary structure of the lactose permease gene from the yeast Kluyveromyces lactis. Presence of an unusual transcript structure', ...), Reference(title='Direct Submission', ...)]\n", + "/comment=the sequence submitted starts from the 5'end of LAC4 gene but goes\n", + "to the opposite direction; therefore, base number 1 is -1199 of\n", + "LAC4 gene; for LAC4 gene seq. see\n", + "Mol. Cell. Biol. (1987)7,4369-4376.\n", + "Dseq(-7127)\n", + "GCGA..TTCG\n", + "CGCT..AAGC\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.crispr import cas9, protospacer\n", + "from pydna.genbank import Genbank\n", + "\n", + "# initalize your favourite gene\n", + "gb = Genbank(\"myself@email.com\") # Tell Genbank who you are!\n", + "gene = gb.nucleotide(\"X06997\") # Kluyveromyces lactis LAC12 gene for lactose permease that have been integrated into K. phaffi\n", + "target_dseq = Dseqrecord(gene)\n", + "print(target_dseq)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we have chosen some guides and can add them to our cas9 enzymes and simulate the cuts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cutting with guide 1: (Dseqrecord(-135), Dseqrecord(-6992))\n", + "cutting with guide 2: (Dseqrecord(-6793), Dseqrecord(-334))\n" + ] + } + ], + "source": [ + "\n", + "# Choose guides\n", + "guides = [\"CCCTAAGTCCTTTGAAGATT\", \"TATTATTTTGAGGTGCTTTA\"]\n", + "\n", + "# Create an enzyme object with the protospacer\n", + "enzyme = cas9(guides[0])\n", + "\n", + "# Simulate the cut with enzyme1\n", + "print('cutting with guide 1:', target_dseq.cut(enzyme))\n", + "\n", + "# Create an enzyme from the protospacer\n", + "enzyme2 = cas9(guides[1])\n", + "\n", + "# Simulate the cut with enzyme2\n", + "print('cutting with guide 2:', target_dseq.cut(enzyme2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With these guides I would be able to generate a stable KO with a repair 60/90mer oligo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My repair oligo for this experiment : AGGTGAACACACTCTGATGTAGTGCAGTCCCTAAGTCCTTTGAAGTTACGGACTCCTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTC \n", + "My repair oligo for this experiment length : 90 \n" + ] + } + ], + "source": [ + "repair_oligo = target_dseq.cut(enzyme)[0][-45:]+target_dseq.cut(enzyme2)[-1][:45]\n", + "repair_oligo.name = 'My repair oligo for this experiment'\n", + "print(f'{repair_oligo.name} : {repair_oligo.seq} ')\n", + "print(f'{repair_oligo.name} length : {len(repair_oligo.seq)} ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The final edit gene would look like this in a case of homologous recombination. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
name|45\n",
+       "     \\/\n",
+       "     /\\\n",
+       "     45|My repair oligo for this experiment|45\n",
+       "                                            \\/\n",
+       "                                            /\\\n",
+       "                                            45|name
" + ], + "text/plain": [ + "Contig(-469)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.assembly import Assembly\n", + "\n", + "my_KO = Assembly((target_dseq.cut(enzyme)[0],repair_oligo, target_dseq.cut(enzyme2)[-1]), limit = 20 )\n", + "my_assembly_KO, *rest = my_KO.assemble_linear()\n", + "my_assembly_KO" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Example_Gibson.ipynb b/docs/notebooks/Example_Gibson.ipynb index 03ca07c1..b8fe3a0e 100755 --- a/docs/notebooks/Example_Gibson.ipynb +++ b/docs/notebooks/Example_Gibson.ipynb @@ -1,267 +1,267 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example of a Gibson Assembly in pydna\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "This example showcases a workflow of modelling Gibson assembly to clone gene fragments into plasmids for synthetic biology. The biological example is sourced [here](https://www.nature.com/articles/nmeth.1318#MOESM319), from the original Gibson assembly paper. This example constructs a synthetic pCC1BAC plasmid by joining sequence fragments from Ruminiclostridium (Clostridium) cellulolyticum. The R. cellulolyticum fragments joined are termed F1, F2, and F3, as in the paper.\n", - "\n", - "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Importing all necessary classes and methods\n", - "\n", - "from pydna.parsers import parse\n", - "from pydna.tm import tm_default\n", - "from pydna.amplify import pcr\n", - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.assembly import Assembly\n", - "from pydna.genbank import Genbank\n", - "from pydna.gel import gel\n", - "from pydna.ladders import GeneRuler_1kb_plus\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'molecule_type': 'DNA',\n", - " 'topology': 'circular',\n", - " 'data_file_division': 'BCT',\n", - " 'date': '25-AUG-2017',\n", - " 'accessions': ['CP001348', 'AAVC01000000', 'AAVC01000001-AAVC01000121'],\n", - " 'sequence_version': 1,\n", - " 'keywords': [''],\n", - " 'source': 'Ruminiclostridium cellulolyticum H10',\n", - " 'organism': 'Ruminiclostridium cellulolyticum H10',\n", - " 'taxonomy': ['Bacteria',\n", - " 'Bacillota',\n", - " 'Clostridia',\n", - " 'Eubacteriales',\n", - " 'Oscillospiraceae',\n", - " 'Ruminiclostridium'],\n", - " 'references': [Reference(title='Complete sequence of Clostridium cellulolyticum H10', ...),\n", - " Reference(title='Direct Submission', ...)],\n", - " 'comment': 'URL -- http://www.jgi.doe.gov\\nJGI Project ID: 4002584\\nSource DNA and bacteria available from Jizhong Zhou\\n(jzhou@rccc.ou.edu)\\nContacts: Jizhong Zhou (jzhou@rccc.ou.edu)\\n David Bruce (microbe@cuba.jgi-psf.org)\\nAnnotation done by JGI-ORNL and JGI-PGF\\nFinishing done by JGI-LANL\\nFinished microbial genomes have been curated to close all gaps with\\ngreater than 98% coverage of at least two independent clones. Each\\nbase pair has a minimum q (quality) value of 30 and the total error\\nrate is less than one per 50000.\\nThe JGI and collaborators endorse the principles for the\\ndistribution and use of large scale sequencing data adopted by the\\nlarger genome sequencing community and urge users of this data to\\nfollow them. it is our intention to publish the work of this\\nproject in a timely fashion and we welcome collaborative\\ninteraction on the project and analysis.\\n(http://www.genome.gov/page.cfm?pageID=10506376).'}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reading the R. cellulolyticum genome from GenBank\n", - "gb = Genbank(\"example@example.com\")\n", - "genome = gb.nucleotide(\"CP001348.1\")\n", - "# Print the info of the genome\n", - "genome.annotations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'molecule_type': 'DNA',\n", - " 'topology': 'circular',\n", - " 'data_file_division': 'SYN',\n", - " 'date': '29-AUG-2024',\n", - " 'accessions': ['.'],\n", - " 'keywords': [''],\n", - " 'source': 'synthetic DNA construct',\n", - " 'organism': 'synthetic DNA construct',\n", - " 'taxonomy': [],\n", - " 'references': [Reference(title='Direct Submission', ...),\n", - " Reference(title='Direct Submission', ...),\n", - " Reference(title='Direct Submission', ...),\n", - " Reference(title='Direct Submission', ...)],\n", - " 'comment': 'SGRef: number: 1; type: \"Journal Article\"; journalName: \"Submitted\\n(23-AUG-2007) 726 Post Road, Madison, WI 53713, USA\"\\nSGRef: number: 2; type: \"Journal Article\"\\nSGRef: number: 3; type: \"Journal Article\"'}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reading the plasmid\n", - "vector = parse(\"./pCC1BAC.gb\")[0]\n", - "vector.annotations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Importing pre-designed primers for the PylRS insert fragment. \n", - "\n", - "F1_For = \"GCAGCTTCAAGTCCTGCAAACAAGGTGTACCAGGATCGTT\"\n", - "F1_Rev = \"GATTTCAGTGTAGTTAGGGCCAGTTGAATTCAAACCTGCC\"\n", - "F2_For = \"GGCAGGTTTGAATTCAACTGGCCCTAACTACACTGAAATC\"\n", - "F2_Rev = \"CTTGGTGCCATCAGCATTGTTCTCTGTACCGCCCACTGTC\"\n", - "F3_For = \"GACAGTGGGCGGTACAGAGAACAATGCTGATGGCACCAAG\"\n", - "F3_Rev = \"CAGTTGAATAATCATGTGTTCCTGCGGCAAATGCAGTACC\"\n", - "BACF1_For = \"AACGATCCTGGTACACCTTGTTTGCAGGACTTGAAGCTGCgcggccgcgatcctctagagtcgacctg\"\n", - "BACF3_Rev = \"GGTACTGCATTTGCCGCAGGAACACATGATTATTCAACTGgcggccgccgggtaccgagctcgaattc\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5210\n", - "5384\n", - "5172\n", - "8221\n" - ] - } - ], - "source": [ - "# Getting the PCR products from the genome (might take a while since the genome is large)\n", - "\n", - "pcr_product_F1 = pcr(F1_For, F1_Rev, genome, limit=20)\n", - "pcr_product_F2 = pcr(F2_For, F2_Rev, genome, limit=20)\n", - "pcr_product_F3 = pcr(F3_For, F3_Rev, genome, limit=20)\n", - "pcr_product_BAC = pcr(BACF1_For, BACF3_Rev, vector, limit=20)\n", - "\n", - "# Printing out the PCR fragment sizes\n", - "print(len(pcr_product_F1))\n", - "print(len(pcr_product_F2))\n", - "print(len(pcr_product_F3))\n", - "print(len(pcr_product_BAC))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAFoAlgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK6/wCH3hWx8WeIrfT7+W4jhlcKWgZQ36g1yFel/Bf/AJHey/66D+dAGH8QfCtj4T8RXGn2EtxJDE5UNOylv0ArkK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK9L+C/wDyO9l/10H8680r0v4L/wDI72X/AF0H86AD40f8jve/9dD/ADrzSvS/jR/yO97/ANdD/OvNKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACvS/gv/wAjvZf9dB/OvNK9L+C//I72X/XQfzoAPjR/yO97/wBdD/OvNK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK7n4Y+INL8PeKbW91W6+z26OCz+Wz4H0UE1w1FAHc/E7xBpfiHxTdXulXX2i3dyVfy2TI+jAGuGoooAKKKKACinxQyzuEijeRz0VFJJq7faDrOmIj6hpN/aJINyNcWzxhh6jIGaAM+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXinhj/AJDVv/viva/jf/yBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8hq3/3xXtfxv/5Amjf9eiV4p4Y/5DVv/viva/jf/wAgTRv+vRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooA2fDH/Iat/8AfFe1/G//AJAmjf8AXoleKeGP+Q1b/wC+K9r+N/8AyBNG/wCvRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoooHWgCa1u57OdZrd9kinIOAf51t65448R+JIIYdW1H7RHAgSMeRGm1R2+VRXY/Dn4baN4v07ULjULm/ie2gaRBbyIoJHrlTXnusWMWn6jLbxM7IjEAuQTQBn0UUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP8AkB6z/wBej14r4n/5Ddx/vmvavgh/yA9Z/wCvR68V8T/8hu4/3zQBjUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP+QHrP/Xo9eK+J/wDkN3H++a9q+CH/ACA9Z/69HrxXxP8A8hu4/wB80AY1FFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFA60UDrQB9B/BD/kB6z/16PXivif/AJDdx/vmvSvhZ448OeG9K1OHVtR+zyT27JGPIkfcx7fKpry/XbuC81Saa3ffGzEg4I/nQBmUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAPihlncJFG8jnoqKSTV2+0HWdMRH1DSb+0SQbka4tnjDD1GQM1Z8Mf8hq3/3xXtfxv/5Amjf9eiUAfPdFKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf/AHxXtfxv/wCQJo3/AF6JXinhj/kNW/8Aviva/jf/AMgTRv8Ar0SgD58PU0lKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf8A3xXtfxv/AOQJo3/XoleKeGP+Q1b/AO+K9r+N/wDyBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXgNrdz2c6zW77JFOQcA/zrb1zxx4j8SQQw6tqP2iOBAkY8iNNqjt8qigDnz1NJRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAKBk4qaeFYgpUnkZ5qFfvCrV391PpQBUqWCMSyBWJx7VFViz/wBePrQAyeMRSFVJx71FVi8/15+tV6ACrVtbJMrFiwwM8VVq/Y/6t/8AdoApONrECm06T/WGm0AFdV4b8NWesW1xJcSzqY0LDy2A5/EGuVr0PwN/x43v/XE0AcJeQLb3LxoSQDjmq9XNU/4/pP8AeNU6AJ7aFZpQrEgH0ouYVhlKqSQPWpLD/Xr9aL//AF7fWgCpRRRQBLBGJJApJx7VLeWyW74QsfrTLT/XD61a1P8A1n4UAZ1FFFABRRRQAVLbxLLIFYkA+lRVZs/9ev1oAfe2kds+ELH6mqdaeq/638KzKACiiigB0ahnAPevR9C+Huk6n4XutTmuL1Z4QCqo6BT9crn9a85h/wBav1r3Lwj/AMk/1H/dFAHid9bpbXLxoWIBxzVar+rf8f0n1qhQAUUUUAFFFFABWppemw3r7ZWkA/2SP8Ky66Dw/wD638KAMm9tktpiiFiB61VrQ1T/AI+W+tZ9ABRRRQAUUUUAaWl6fFezKkjOAT/CR/hRqmnxWUzJGzkA/wARH+FWvD//AB9J9aPEH/H0/wBaAMOiiigAooooA6zwZ4XsvEWpxW13LcIjsATCyg/qDXT/ABJ+GujeDrjy9Pub+UbQf9IkRu3soqn8Lv8AkPW/+8K7746f8fo/3B/KgD57YYYihRlgKV/vmhPvCgDodK0C1voHeWSYFVyNrD/Cse9tUtpWRCxA9a63w7/x6Tf7hrmNV/4+W+tAGdSqMsAaSnR/fFAHS6R4cs7+3eSWScFVyNjAf0rF1C0jtZmRCxA/vGuy8Nf8eUv+4a5TWf8Aj6f60AZdFFFAGhplhFeTKkjOAT/CRW/rXhWx062jkhluGLLk72U/yFZWg/8AH0n1rsfFP/HjD/uCgDzaRQrkCm0+b/WGmUAKo3MAe9dt4V8Hafrk6JczXSBjz5TKP5qa4qP/AFi/WvVvh5/x9RfUUAc94q8Hafoc7pbTXThTx5rKf5KK4lhtYgdq9X+If/H1L9TXlMn+sb60ANooooAKKKKAFX7wq1d/dT6VVX7wq1d/dT6UAVKsWf8Arx9ar1Ys/wDXj60AF5/rz9ar1YvP9efrVegAq/Y/6t/92qFX7H/Vv/u0AUpP9YabTpP9YabQAV6H4G/48b3/AK4mvPK9D8Df8eN7/wBcTQBxGqf8f0n+8ap1c1T/AI/pP941ToAt2H+vX60X/wDr2+tFh/r1+tF//r2+tAFSiiigCe0/1w+tWtT/ANZ+FVbT/XD61a1P/WfhQBnUUUUAFFFFABVmz/16/Wq1WbP/AF6/WgC1qv8ArfwrMrT1X/W/hWZQAUUUUAPh/wBav1r3Lwj/AMk/1H/dFeGw/wCtX617l4R/5J/qP+6KAPGdW/4/pPrVCr+rf8f0n1qhQAUUUUAFFFFABXQeH/8AW/hXP10Hh/8A1v4UAZ+qf8fLfWs+tDVP+PlvrWfQAUUUUAFFFFAG54f/AOPpPrR4g/4+n+tHh/8A4+k+tHiD/j6f60AYdFFFABRRRQB6P8Lv+Q9b/wC8K7746f8AH6P9wfyrgfhd/wAh63/3hXffHT/j9H+4P5UAfPj/AHzQn3hQ/wB80J94UAdt4d/49Jv9w1zGq/8AHy31rp/Dv/HpN/uGuY1X/j5b60AZ1Oj++KbTo/vigDvfDX/HlL/uGuU1n/j6f611fhr/AI8pf9w1yms/8fT/AFoAy6KKKANrQf8Aj6T612Pin/jxh/3BXHaD/wAfSfWux8U/8eMP+4KAPN5v9YaZT5v9YaZQA6P/AFi/WvVvh5/x9RfUV5TH/rF+terfDz/j6i+ooAPiH/x9S/U15TJ/rG+terfEP/j6l+prymT/AFjfWgBtFFFABRRRQAq/eFWrv7qfSqq/eFWrv7qfSgCpViz/ANePrVerFn/rx9aAC8/15+tV6sXn+vP1qvQAVfsf9W/+7VCr9j/q3/3aAKUn+sNNp0n+sNNoAK9D8Df8eN7/ANcTXnleh+Bv+PG9/wCuJoA4jVP+P6T/AHjVOrmqf8f0n+8ap0AW7D/Xr9aL/wD17fWiw/16/Wi//wBe31oAqUUUUAT2n+uH1q1qf+s/Cqtp/rh9atan/rPwoAzqKKKACiiigAqzZ/69frVarNn/AK9frQBa1X/W/hWZWnqv+t/CsygAooooAfD/AK1frXuXhH/kn+o/7orw2H/Wr9a9y8I/8k/1H/dFAHjOrf8AH9J9aoVf1b/j+k+tUKACiiigAooooAK6Dw//AK38K5+ug8P/AOt/CgDP1T/j5b61n1oap/x8t9az6ACiiigAooooA3PD/wDx9J9aPEH/AB9P9aPD/wDx9J9aPEH/AB9P9aAMOiiigAooooA9H+F3/Iet/wDeFd98dP8Aj9H+4P5VwPwu/wCQ9b/7wrvvjp/x+j/cH8qAPnx/vmhPvCh/vmhPvCgDtvDv/HpN/uGuY1X/AI+W+tdP4d/49Jv9w1zGq/8AHy31oAzqdH98U2nR/fFAHe+Gv+PKX/cNcprP/H0/1rq/DX/HlL/uGuU1n/j6f60AZdFFFAG1oP8Ax9J9a7HxT/x4w/7grjtB/wCPpPrXY+Kf+PGH/cFAHm83+sNMp83+sNMoAdH/AKxfrXq3w8/4+ovqK8pj/wBYv1r1b4ef8fUX1FAB8Q/+PqX6mvKZP9Y31r1b4h/8fUv1NeUyf6xvrQA2iiigAooooAVThhU9xKjqu05wPSq9FABU1s6xyhmOBUNFAE1y6ySllORUNFFABVu1njjRg7YJHpVSigBzkFyR0ptFFABXZeFNd03TbS6S7ufLZ4yqjYxyfwFcbRQBZv5UmuneNsqTwcVWoooAsWkqRTBnOB9KLuVJZiyHI+lV6KACiiigCW3dUlBY4FWL6eOZ8xtkfQ1SooAKKKKACiiigAqa2kWOUFjgVDRQBf1C4inkzG+4fQiqFFFABRRRQA+NgrgnpmvVfDvjLQLDwfeWFzf7LmRQETyZDn8QuK8nooAt6hNHPdO8bblJ4OMVUoooAKKKKACiiigArY0e+trWTM0m0Y/uk/yrHooAuX88c87NG24E+mKp0UUAFFFFABRRRQBq6PeW9rcK00m1QeuCf5UaxeW91cM0Mm5SeuCP51lUUAFFFFABRRRQB2vgLXtM0XV4Z9QufJjVgS3ls38ga6/4r+OfDniW6D6RqP2ldoGfIkTt/tKK8booAViCxIoU4YE0lFAHVaLrFhaW8iTz7GK4A2Mf5CsLUJ4p52aNtwJ9CKpUUAFOQgMCabRQB1+h63p1nbSJPcbGKkAbGP8AIVz+p3MVxOzRPuBPoRVCigAooooA1NJu4LWdWmfaAeuCf5V0uv8AiDS720iS3ut7KgBHlsP5iuGooAfIwZyR0plFFADkIDgnpXoHg3xNpGk3EbXt35Sg8ny3b+QNee0UAeheMvE2katcSNZXfmqTwfLdf5gV5+5BckdKbRQAUUUUAFFFFABUzJbiyidZWNyZHEke3hUAXac9ySX4/wBketP057WPU7R76J5bRZkM8afeePcNwHI5IzXoGraZcWWi3V3qttYvpzwuLdbfw/JayLIVIjPmmFAAG2k5dsgEc5oA82oore8JJBNq7wPAstzLCVtDJatcokuVOWjUEsNocfdbkg4OKAMe6S3SVRbStJH5cZLMuCHKAuPoG3DPfFQ12njGyk07ToYNUgtv7TeYNFLa6S9ioiAbcGDRRbiSUx8pxg881xdABRRVmx06+1S4+z6fZXF3PjPl28TSNj1wATQBXZWRyjqVZTggjBBpKvaxb6pb6nM2sW1zb3szGWRbmIxuxYkk4IHU5qjQAUUVZsdOvtUuPs+n2Vxdz4z5dvE0jY9cAE0AV2Vkco6lWU4IIwQaSr2sW+qW+pzNrFtc297MxlkW5iMbsWJJOCB1Oao0AFKVZQpKkBhlSR1GccfkaWOOSaVYokZ5HIVVUZLE9gK09V0rXrC2tTq2mX9rBGnlQG5tmjGCzPgEgZ5Zj+NAGVRRRQAUu1ggcqdpJAOOCR1/mPzpOprXvNG8Q2ekRNe6VqMGno7SJJNaukYZwoJ3EY5Cr37UAZFFFFABRRRQAUUU6OOSaVYokZ5HIVVUZLE9gKAEKsoUlSAwypI6jOOPyNJWrqula9YW1qdW0y/tYI08qA3Ns0YwWZ8AkDPLMfxrKoAKKKKAClVWdwiKWZjgADJJpK0tFsNZu75J9Esby5ubZ1lU2sDSmNgcg4APcd6AM2iprq0uLK5e3u7eW3nQ4eKVCjL9QeRUNABRRRQAUUUUAFFFWbHTr7VLj7Pp9lcXc+M+XbxNI2PXABNAFdlZHKOpVlOCCMEGkq9rFvqlvqczaxbXNvezMZZFuYjG7FiSTggdTmqNABRRRQAUUUUAFFFTTpbrFbGGVnkaMmZSuAj72AA9RtCnPufSgCGiiigAooooAKKKmnS3WK2MMrPI0ZMylcBH3sAB6jaFOfc+lAENFFFABRU1mlvJewJdytFbNIolkVdxRM8kDuQM8VDQAUUUUAFFTWaW8l7Al3K0Vs0iiWRV3FEzyQO5AzxUNABRRRQAUUVMyW4sonWVjcmRxJHt4VAF2nPckl+P9ketAENFFFABRRRQAUUrKyHDKVOAcEY4IyKSgAooooAKKKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vroPCTMLy+RxGLKS0K3kjzmHy4t6chlDEHdsGArZzjBzXP10fgtJW1edoXnaRLdmFrAEL3fzKPKAcMp67sFW+50JoAueIEsbfw35GhyR3GmG7Rp5ftjzOkux9gw8MW0Eb+Qpzt68VyFdz4yS5bRIpLjT73Rdtwqrp11DDF5uVbMoWOKLO3GCSp++MHqK4agArqPCohk07U4NRaOLSXeHz5muWhZZAH2KCschbI3nGw/dzxiuXrrvBCT7dQkt4bq/dfLU6ZbRxSNcA7vnKyJICEx/cY/OOnNAEPioQx6dpkGnNHLpKPN5Ey3LTM0hCb1JaOMrgbDjYPvZ5zXL113jdJ9mnyXEN1YO3mKNMuY4o2twNvzhY0jAD5/uKfkPXiuRoAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaANjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rf1KLS7Xw9qa+Hp4rlJUT7bm+klaNBIhDKrQQj7+wZ+bGe2c1geFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vooooAKKKKACtjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rHrY8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpQBv6lFpdr4e1NfD08VykqJ9tzfSStGgkQhlVoIR9/YM/NjPbOa4ivRPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/+IfKenBrzugAooooAK6/w+ljceG/I1ySO30wXbtBL9seF3l2JvGEhl3ADZyVGN3XmuQruPB1rey6fAbeVJrZ7m4NzayQQThdkSMhVJVYKzklN2McD0oAx/FrMbyxRBGbKO0C2ciTmbzIt78lmVSTu3jBVcYxgYrn66Hxe5m1Czud8wFxaJKtvMULWylmAT5FVQCAHACrw447nnqACiiigAooooAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaACiiigAooooAtadYTanepawFFdgzFpG2qiqpZmJ9AoJ/Cr91oCxWU91Z6vp2opbgNOtt5qtGpYKGxIiZG5lHy56ik8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpXWeJVvJPDl41xpGo6CibGCXNvbwreneo2/u4IixGd/8Q+U9ODQB53RRRQAUUUUAWtOsJtTvUtYCiuwZi0jbVRVUszE+gUE/hV+60BYrKe6s9X07UUtwGnW281WjUsFDYkRMjcyj5c9RSeFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv8AQGgDnKKKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv9AaAOcooooAmtLWa+vYLS2TfPPIsUa5A3MxwBk+5rXfw2pimNprel3txDG0r20DSh9qgsxBeNUbABPDHgcZrO0lPN1mxj+1/Y91xGv2nOPJyw+fOR069e1eiaot9LpN+s2j6po8Yt5GOpT29tGlxhSdhdIEY78beJGyW7igDy+iiigAoorT0fSTqy36Rt/pENuJYY9yjzG8xFK89flZjgc/LQBRuLma6kEk7l3CJGCf7qKFUfgoA/Coq6nxL4dsdJtJzbi7jltbtbUtcSoy3WULGSNQAVAwvBLf6xeQeK5agAooooAKKKKALOnTW9vqdrNdwefbRzI80OceYgILL+IyK6P+2tDuri7N/ZxF0t51tLm2so4BIzRMqCSFTtGGIIZeQRzu4IKKAOTrT0O80yyup5NU01L+JoHWKN2cBJeCrHY6EjjB+boxPJAFFFABqOqWd7brHb6Dp1g4fcZbZ7hmIwflPmSuMc56Z4HPWsyiigAra0nUrCx0a+jubC1vJ5riDak6NkRhZd+11IKnJj6HnjIIFFFAFTUv7KbypdM+1x7s+ZBcbW8vpja4xuB56quMd6oUUUAFb3h5tDKNHrD+Xi8t5QfKZt8S7/MTK8ru3J27UUUAHiK+sb2C1MJtHu1kmEslpZi2jMWVEY2gAFuHOcZwwBJI4waKKALmlai2lanBfLbWt0YiT5N3CJYnBBBDKeD1/DqOa6PxF4stdU8P21lZW1tAXnlkni/s22jMQIi2hJI0U9UbJAUkEA5AySigDkKKKKALWmS2sGq2c19CZrSOdGniHV0DAsO3UZFdNLr+m3cV7Dc2ulJEttIga305Uku5vmEUi4UeUF+TKggEKeGJNFFAHH0UUUAFFFFABWhod1aWWrw3F9CJYFDggxCQKxUhW2MQG2sQ208HGD1oooA1JtW0a903U2nsY4tUeIRwS28CpFL+9Rt5QcRPsVh8vBBxgck83RRQAUUUUAFbem6ppFlo8kVzoNrf6h5+5ZrmSYL5RXBXEcqYIIyODncemBkooAz9RvIL24WS3021sECbTFbNKyk5PzHzHc55x1xwOOtVKKKACiiigAooooAK19G1DSrG1vhqGjw6hcOqG2MzyhEIPzAiORDgg9cnBUcck0UUAV9S1C2vvK+z6PZadszu+yvM3mZx18yR+mO2OpznjFCiigAooooAKKKKANPQLu1sdV+0XkUMsS284VJ4vMQyGFwmV/3yvPbrxin3k+i3lo80NrPYXoxiGJvNgk55xuO9MDJ5L59qKKAMmiiigAooooA0NGexTUf+JjxbNDMhbZv2u0TKjY9mKn8K3NUvdDGjTWVq9pMqW0AtjHZbJfPyPNdpSAxUgPwSR86gD5c0UUAcnRRRQB0Xh3xJDpVrLY3Wn2UsMr7xdNYQTzwnAHHmqQy8fd49iMnNbxVrA13xJfX0awrA80nk+VbRwZjLsVLBAMtg8k5PqTiiigDGooooA6LSNasNM0RkbT7O7u2mk3rc2yvuUoPLKucldjgkqMBg2DkdKviK6s7u7tntTbvKLcC6ktrcQRSS7mOVQBQBtKD7oyQTjnJKKAMeiiigCzp01vb6nazXcHn20cyPNDnHmICCy/iMiuj/ALa0O6uLs39nEXS3nW0ubayjgEjNEyoJIVO0YYghl5BHO7ggooA5OiiigArX0O/s9PTUpLq1trp3tQkEVzEXUv5sZPIwVOwP8wIPvzRRQBFqD6PPbrNYRXVrcFsPbORJHjByyvww5wNpB6/eNZtFFABRRRQAUUUUAf/Z", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAFoCAIAAAAElhK7AAAbC0lEQVR4Ae3dTaxdVdkAYG4vUOSvRQMUpI1JIRoSTUeYiDKhECKJRutPJCFGYiIJMSZqHDkyDMCBDgwTEhOVCQwalDAg0AjG3xAIN1ZhoBICJAoItJW/CqWujwN7n+909e5zetbad629nzvQdVbXfte7nnfv+3LvOZSTTvJFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gVWNjDD1dXVU0455eyzz77ooot27dp19dVXf/nLXw753HXXXffff//a2tqzzz576NChN99888iRIxuYp60JECBAYMACmwZ8NkcjQIAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqExgpbJ8E6W7srKyurq6efPmrVu3bt++fdeuXbt3796zZ08Iv3fv3n379q2trT3zzDMHDhw4fPjwkSNHjh49mmhnYQgQIECgLAF/xVpZ9ZANAQIECPQsoBH2DG47AgQIEChLQCMsqx6yIUCAAIGeBTTCnsFtR4AAAQJlCWiEZdVDNgQIECDQs4BG2DO47QgQIECgLAGNsKx6yIYAAQIEehbQCHsGtx0BAgQIlCWgEZZVD9kQIECAQM8CGmHP4LYjQIAAgbIENMKy6iEbAgQIEOhZQCPsGdx2BAgQIFCWgEZYVj1kQ4AAAQI9C2iEPYPbjgABAgTKEtAIy6qHbAgQIECgZwGNsGdw2xEgQIBAWQIaYVn1kA0BAgQI9CygEfYMbjsCBAgQKEtAIyyrHrIhQIAAgZ4FNMKewW1HgAABAmUJaIRl1UM2BAgQINCzgEbYM7jtCBAgQKAsAY2wrHrIhgABAgR6FtAIewa3HQECBAiUJaARllUP2RAgQIBAzwIaYc/gtiNAgACBsgQ0wrLqIRsCBAgQ6FlAI+wZ3HYECBAgUJaARlhWPWRDgAABAj0LaIQ9g9uOAAECBMoS0AjLqodsCBAgQKBnAY2wZ3DbESBAgEBZAhphWfWQDQECBAj0LKAR9gxuOwIECBAoS0AjLKsesiFAgACBngU0wp7BbUeAAAECZQlohGXVQzYECBAg0LOARtgzuO0IECBAoCwBjbCsesiGAAECBHoW0Ah7BrcdAQIECJQloBGWVQ/ZECBAgEDPAhphz+C2I0CAAIGyBDTCsuohGwIECBDoWUAj7BncdgQIECBQloBGWFY9ZEOAAAECPQtohD2D244AAQIEyhLQCMuqh2wIECBAoGcBjbBncNsRIECAQFkCGmFZ9ZANAQIECPQsoBH2DG47AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZBdYyb5DMRusrKxs2rTptNNO27Jly44dO3bt2rV79+49e/aEBPfu3btv3761tbWnn3764MGDb7zxxttvv3306NFicpcIAQIECOQS8Fes5ZIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILCAwMoCawe0dGVlZXV1dfPmzVu3bt2+ffuuXbt27969Z8+ecMS9e/fu27dvbW3tmWeeOXDgwOHDh48cOXL06NEBnd5RCBAgQKAV8FestRZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEBi6wMnO+lZWVTZs2nXzyyZs3bz799NPDn7722muHDx9+66233n777aNHj86sP/ZliBAuP+WUU0477bQmwhtvvPHmm2+GIPNEODbmZGaS2Kmnnhoin3HGGWHy1VdfDZH/+9//TtI73oWd85OcQ9gtW7ZccMEFO3fuvPTSS8NVjz/++D/+8Y9//vOfBw8eDBvNn/+EMSAExve9731nnnlmiPbKK6+8/vrrATNQzIm5TuZBI3xNnGe2mGiHLcLXOhF6+KPgEL6a+yFU7ayzzgr7/uc//5nUrrkrlrkxjneQsHUgCruH+3BS2YsvvvhjH/vYZZddFi55+OGH//znP//973+f1Dfc5/Pf5MfbMdP85CCrq6vh5g+1DobhOO9///vDdi+99FK4OYNnuLXCg3DkyJFQ9ByYcx7tnbvy/27LyTeQs88+e+vWrR/4wAfC5S+++OKBAwcOHTo0+ZYyeQqy3qLBbYIWbryQxuTR/uhHPxpugMsvvzyk9Pvf/z7cBvv375885iG9cFtOGNc3nEQOt9Z0RcIxzz333BD2hRdeCIedrku4tUJp1o85LRwYQ+YTxpB8YDznnHNC/PPOOy8se/7550P8l19+OWCGhCffUialnw4yz7g5SPOdalKvsNG2bdtChH/961+T7QJO8x1s0eNMZ9J8Vwy38WSv888/P5Tmoosu2rFjR1j59NNPP/vss+GpfO655yb3zOT2nhxzOlTCsb9iLSGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQWETgc5/73CLLI2uXjxAJaooAAQIECPQj8NBDDy250fIRlkzA5QQIECBA4HgC3f89wt/97nfHu3jO+eUjzLmRZQQIECBAgAABAgQIECAwJoHcb0CmjZ82WrTOPWwR3XehyUKSLCSNhehmFg/gCDMn8pIAgYUFcr8BmTZ+2mhRrB62iO670GQhSRaSxkJ0M4sHcISZE3lJoH+B+HuEW7ZsueWWW/bv3//8O19hcOutt4bJ+fNbPsKce+V+AzJt/LTRokQ9bBHdd6HJQpIsJI2F6GYWD+AIMyfykkApAr/85S+/853vfPCDH9z0zlcYhJdhcv78lo8w/15WEiBAgACBxALRf8yMTh5v4+ji6OTxIpgnQGBgAqW9o5kpn0xhm5shd/xmozDoc6/pfXsex381+u9///vb3/72hRdeOPmJMAzCyzA5f3LLR5h/LysJEKhC4Fvf+lZReWbKJ1PYhi53/GajMOhzr+l9ex7HG+FXv/rVbdu23X///c+98xUG4WWYnD+55SPMv5eVBAhUIVDa74Qy5ZMpbFPi3PGbjcKgz72m9zUmQIAAAQIENlpg+c98Lh9hHYOswcO+aeOnjRZl6WGL6L4LTRaSZCFpLEQ3s3gAR5g5kZcEGoENeVcy/qvRn//85y+88MI111wTfiMavsIg/GsUYbLJtXOwfIR1tsgaPOybNn7aaFGWHraI7rvQZCFJFpLGQnQziwdwhJkTeUmgESjoXcno74Wjk032M4Po4ujkzIXzvIzGiU7OE+3YNdFQ0cljrz12JnphdPLYa+eciUaLTs4ZMMeyaD7RyRy7NzGjO0Ynm0tKG0SzjU6Wlrl8CHQK3HzzzZ1rki+I/0S4/Gc+l4+wzlGzBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOIBHGHmRF4SaAS+//3vN+MNHoQ3IX74wx/+5S9/Cb8gDV9hEF6GyfnTWj7COntlDR72TRs/bbQoSw9bRPddaLKQJAtJYyG6mcUDOMLMibwkQIAAAQIECBAoTCD8I2fJf9fo8umt7502ftpo0cx72CK670KThSRZSBoL0c0srusIpWWbKZ9MYZvS547fbBQGfe41ve8GjuPvES7/sbTlI6yDkjV42Ddt/LTRoiw9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3re4cfQTaNHJ46UeXRydPF6EdeajcaKT6wRZ54+ioaKT6wRp/ih6YXSyuWTRQTRadHLRyAnXR/OJTibc9NhQ0R2jk8deW8hMNNvoZAkJRxOLTvaTbXTr6ORC+UQjRCcXCtssjoaKTjaXnPAgGjY6ecJblHZh/CfC5T+WtnyEdaSyBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOK6jlBatpnyyRS2KX3u+M1GYdDnXtP7FjcOvyP2qdFlPjQ7XdHlMaejRcc9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3teYAAECBAgQKEkg/BOBT43u378//MVy4SsMbr311mByYiVaHrNz3x626Myhc0EhSTZ/b8XOnTt//etfh39NNvxvGHfmX86CQiTnBCkt20z5ZArbIOeO32wUBn3uNdm30Kdy+f++/PIRpgszM84aPOyVNn7aaDMUk5c9bBHdd6HJQpJ86KGHJmnfc889n//851dXVz/xiU/cd999C51lYxcXIjknQmnZZsonU9gGOXf8ZqMw6HOvyb6FPpXRDwhFJ6f5psfRxdHJ6avmHEfjRCfnDDizLBoqOjlzYfRl9MLoZPTyeSaj0aKT80TLtCaaT3QyUwKTsM0jN711M5l161TBpzNvYkYnmz/dwEE0sehkP0lGt45OLpRPNEJ0cqGwzeJoqOhkc8kJD6Jho5MnvMXMhc0DOL1LMzmzOMdLnxqNqKb90FTaaJF0K/mUVw8OUZyZyR07dtx4442f/exnH3/88Ztuuin8RBjGJ5988syykl8WIjknUWnZZsonU9gGOXf8ZqMw6HOvyb4b/lTGG+Hy/3355SNMF2ZmnDV42Ctt/LTRZigmL3vYIrrvQpOFJPmNb3zj1Vdf/fCHPxz+N7w1uH379q985Stf+9rXFjrLxi4uRHJOhNKyzZRPprANcu74zUZh0Odek30H8FROAxp3C/ziF7/oXmQFAQI1C3jMa67ee7lfffXVTzzxRPjA5E9/+tNzzz13Mn3vvfe+9+cb/P/hJ/cf/OAH5513XqY80n6E6fXXXz906NCB977eeuutyTBh8tdee+1TTz31t7/97corrwxP4IsvvviHP/zhIx/5SMItlg9V+E21/AFFiArkflqjm64zmfbpbjbK/Zj3+Yz3/6iWdpO8W9a1tbXwbfSMM84IPyOHf6/84x//ePiDhd66PHjw4Hvf+f/f/zf3zTKD8F3+61//+mOPPXb77beH33EtEyp6bXPSJB8s/PSnPx0S/tKXvjTZqwke3frEJh999NELLrhg9+7d4dd94R2v8L7Xpz71qQceeODEomW6avmbKkliWe/MJBl2BqnrCLmf1k6umQXNA5jk6W6C537M+3zG+39US7tJ3i3rb3/7202b3n37MLyP8sgjj4T3UZobqKn9OoOf/exn3/zmN9dZsMwfTTJZWVn5zGc+85vf/OZXv/rVFVdcsUzAmWubk6b6CNNZZ51122233X333RdeeGETfGbTZV42Me+4444mTpBpxiUMlr+pkpwi652ZJMPOIHUdIffT2sk1s6B5WFI93U38rI95k3YPz3j/j2ppN8m7Nb3hhhvCt9EvfvGLk9dnnnnm3r17X3nllabknYOtW7cGzW3btnWuPIEFzT0xuTb8wBrS++Mf/3gCoaKXPPnkk5MPFoafOJsPFk4/NtGrOic/+clP/ulPf/rrX//auXLRBXfdddd3v/vdU089dXJh+AzkF77whQcffHDROFnXL39TJUkv652ZJMPOIHUdIffT2sk1syDT093skukx7/MZ7/9RLe0maap5UnhrcPpNpvDjVzmfrLvzzjvbRN8bXXzxxe8Nl/3/q6666vrrr//e97734x//+Ec/+tGHPvShsOMll1yybNyTTtq8efN11123fJyZCKeffnrI9pxzzpnM/+QnPwnvFIZPJM8s2/CXJd9UG44z1ARyP62LuuV7uptMcjzmPT/jPT+qpd0kTSkNCBAgQIAAAQIECBAgQGDoAv8DNIwUyHDTngMAAAAASUVORK5CYII=", - "text/plain": [ - "" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Making a gel to show the PCR products\n", - "im = gel(\n", - " [\n", - " GeneRuler_1kb_plus,\n", - " [pcr_product_F1],\n", - " [pcr_product_F2],\n", - " [pcr_product_F3],\n", - " [pcr_product_BAC],\n", - " ]\n", - ")\n", - "im.rotate(90, expand=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: True\n", - "size: 23827\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 28\n", - "/molecule_type=DNA\n", - "Dseq(o23827)\n", - "GCAG..ccgc\n", - "CGTC..ggcg\n" - ] - } - ], - "source": [ - "# Performing the Gibson Assembly. Note that the assembly class parameters should be given as a list.\n", - "\n", - "assembled = Assembly([Dseqrecord(pcr_product_F1), Dseqrecord(pcr_product_F2), Dseqrecord(pcr_product_F3), Dseqrecord(pcr_product_BAC)])\n", - "assembled_circ = assembled.assemble_circular()\n", - "\n", - "# Printing out the Gibson Assembly product\n", - "print(assembled_circ[0])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of a Gibson Assembly in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "This example showcases a workflow of modelling Gibson assembly to clone gene fragments into plasmids for synthetic biology. The biological example is sourced [here](https://www.nature.com/articles/nmeth.1318#MOESM319), from the original Gibson assembly paper. This example constructs a synthetic pCC1BAC plasmid by joining sequence fragments from Ruminiclostridium (Clostridium) cellulolyticum. The R. cellulolyticum fragments joined are termed F1, F2, and F3, as in the paper.\n", + "\n", + "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing all necessary classes and methods\n", + "\n", + "from pydna.parsers import parse\n", + "from pydna.tm import tm_default\n", + "from pydna.amplify import pcr\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.assembly import Assembly\n", + "from pydna.genbank import Genbank\n", + "from pydna.gel import gel\n", + "from pydna.ladders import GeneRuler_1kb_plus\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'molecule_type': 'DNA',\n", + " 'topology': 'circular',\n", + " 'data_file_division': 'BCT',\n", + " 'date': '25-AUG-2017',\n", + " 'accessions': ['CP001348', 'AAVC01000000', 'AAVC01000001-AAVC01000121'],\n", + " 'sequence_version': 1,\n", + " 'keywords': [''],\n", + " 'source': 'Ruminiclostridium cellulolyticum H10',\n", + " 'organism': 'Ruminiclostridium cellulolyticum H10',\n", + " 'taxonomy': ['Bacteria',\n", + " 'Bacillota',\n", + " 'Clostridia',\n", + " 'Eubacteriales',\n", + " 'Oscillospiraceae',\n", + " 'Ruminiclostridium'],\n", + " 'references': [Reference(title='Complete sequence of Clostridium cellulolyticum H10', ...),\n", + " Reference(title='Direct Submission', ...)],\n", + " 'comment': 'URL -- http://www.jgi.doe.gov\\nJGI Project ID: 4002584\\nSource DNA and bacteria available from Jizhong Zhou\\n(jzhou@rccc.ou.edu)\\nContacts: Jizhong Zhou (jzhou@rccc.ou.edu)\\n David Bruce (microbe@cuba.jgi-psf.org)\\nAnnotation done by JGI-ORNL and JGI-PGF\\nFinishing done by JGI-LANL\\nFinished microbial genomes have been curated to close all gaps with\\ngreater than 98% coverage of at least two independent clones. Each\\nbase pair has a minimum q (quality) value of 30 and the total error\\nrate is less than one per 50000.\\nThe JGI and collaborators endorse the principles for the\\ndistribution and use of large scale sequencing data adopted by the\\nlarger genome sequencing community and urge users of this data to\\nfollow them. it is our intention to publish the work of this\\nproject in a timely fashion and we welcome collaborative\\ninteraction on the project and analysis.\\n(http://www.genome.gov/page.cfm?pageID=10506376).'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reading the R. cellulolyticum genome from GenBank\n", + "gb = Genbank(\"example@example.com\")\n", + "genome = gb.nucleotide(\"CP001348.1\")\n", + "# Print the info of the genome\n", + "genome.annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'molecule_type': 'DNA',\n", + " 'topology': 'circular',\n", + " 'data_file_division': 'SYN',\n", + " 'date': '29-AUG-2024',\n", + " 'accessions': ['.'],\n", + " 'keywords': [''],\n", + " 'source': 'synthetic DNA construct',\n", + " 'organism': 'synthetic DNA construct',\n", + " 'taxonomy': [],\n", + " 'references': [Reference(title='Direct Submission', ...),\n", + " Reference(title='Direct Submission', ...),\n", + " Reference(title='Direct Submission', ...),\n", + " Reference(title='Direct Submission', ...)],\n", + " 'comment': 'SGRef: number: 1; type: \"Journal Article\"; journalName: \"Submitted\\n(23-AUG-2007) 726 Post Road, Madison, WI 53713, USA\"\\nSGRef: number: 2; type: \"Journal Article\"\\nSGRef: number: 3; type: \"Journal Article\"'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reading the plasmid\n", + "vector = parse(\"./pCC1BAC.gb\")[0]\n", + "vector.annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing pre-designed primers for the PylRS insert fragment. \n", + "\n", + "F1_For = \"GCAGCTTCAAGTCCTGCAAACAAGGTGTACCAGGATCGTT\"\n", + "F1_Rev = \"GATTTCAGTGTAGTTAGGGCCAGTTGAATTCAAACCTGCC\"\n", + "F2_For = \"GGCAGGTTTGAATTCAACTGGCCCTAACTACACTGAAATC\"\n", + "F2_Rev = \"CTTGGTGCCATCAGCATTGTTCTCTGTACCGCCCACTGTC\"\n", + "F3_For = \"GACAGTGGGCGGTACAGAGAACAATGCTGATGGCACCAAG\"\n", + "F3_Rev = \"CAGTTGAATAATCATGTGTTCCTGCGGCAAATGCAGTACC\"\n", + "BACF1_For = \"AACGATCCTGGTACACCTTGTTTGCAGGACTTGAAGCTGCgcggccgcgatcctctagagtcgacctg\"\n", + "BACF3_Rev = \"GGTACTGCATTTGCCGCAGGAACACATGATTATTCAACTGgcggccgccgggtaccgagctcgaattc\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5210\n", + "5384\n", + "5172\n", + "8221\n" + ] + } + ], + "source": [ + "# Getting the PCR products from the genome (might take a while since the genome is large)\n", + "\n", + "pcr_product_F1 = pcr(F1_For, F1_Rev, genome, limit=20)\n", + "pcr_product_F2 = pcr(F2_For, F2_Rev, genome, limit=20)\n", + "pcr_product_F3 = pcr(F3_For, F3_Rev, genome, limit=20)\n", + "pcr_product_BAC = pcr(BACF1_For, BACF3_Rev, vector, limit=20)\n", + "\n", + "# Printing out the PCR fragment sizes\n", + "print(len(pcr_product_F1))\n", + "print(len(pcr_product_F2))\n", + "print(len(pcr_product_F3))\n", + "print(len(pcr_product_BAC))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAFoAlgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK6/wCH3hWx8WeIrfT7+W4jhlcKWgZQ36g1yFel/Bf/AJHey/66D+dAGH8QfCtj4T8RXGn2EtxJDE5UNOylv0ArkK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK9L+C/wDyO9l/10H8680r0v4L/wDI72X/AF0H86AD40f8jve/9dD/ADrzSvS/jR/yO97/ANdD/OvNKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACvS/gv/wAjvZf9dB/OvNK9L+C//I72X/XQfzoAPjR/yO97/wBdD/OvNK9L+NH/ACO97/10P8680oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK7n4Y+INL8PeKbW91W6+z26OCz+Wz4H0UE1w1FAHc/E7xBpfiHxTdXulXX2i3dyVfy2TI+jAGuGoooAKKKKACinxQyzuEijeRz0VFJJq7faDrOmIj6hpN/aJINyNcWzxhh6jIGaAM+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXinhj/AJDVv/viva/jf/yBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8hq3/3xXtfxv/5Amjf9eiV4p4Y/5DVv/viva/jf/wAgTRv+vRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooA2fDH/Iat/8AfFe1/G//AJAmjf8AXoleKeGP+Q1b/wC+K9r+N/8AyBNG/wCvRKAPnw9TSUp6mkoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoooHWgCa1u57OdZrd9kinIOAf51t65448R+JIIYdW1H7RHAgSMeRGm1R2+VRXY/Dn4baN4v07ULjULm/ie2gaRBbyIoJHrlTXnusWMWn6jLbxM7IjEAuQTQBn0UUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP8AkB6z/wBej14r4n/5Ddx/vmvavgh/yA9Z/wCvR68V8T/8hu4/3zQBjUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUDrRQOtAH0H8EP+QHrP/Xo9eK+J/wDkN3H++a9q+CH/ACA9Z/69HrxXxP8A8hu4/wB80AY1FFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFA60UDrQB9B/BD/kB6z/16PXivif/AJDdx/vmvSvhZ448OeG9K1OHVtR+zyT27JGPIkfcx7fKpry/XbuC81Saa3ffGzEg4I/nQBmUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAPihlncJFG8jnoqKSTV2+0HWdMRH1DSb+0SQbka4tnjDD1GQM1Z8Mf8hq3/3xXtfxv/5Amjf9eiUAfPdFKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf/AHxXtfxv/wCQJo3/AF6JXinhj/kNW/8Aviva/jf/AMgTRv8Ar0SgD58PU0lKeppKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKANnwx/yGrf8A3xXtfxv/AOQJo3/XoleKeGP+Q1b/AO+K9r+N/wDyBNG/69EoA+fD1NJSnqaSgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDZ8Mf8AIat/98V7X8b/APkCaN/16JXgNrdz2c6zW77JFOQcA/zrb1zxx4j8SQQw6tqP2iOBAkY8iNNqjt8qigDnz1NJRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAKBk4qaeFYgpUnkZ5qFfvCrV391PpQBUqWCMSyBWJx7VFViz/wBePrQAyeMRSFVJx71FVi8/15+tV6ACrVtbJMrFiwwM8VVq/Y/6t/8AdoApONrECm06T/WGm0AFdV4b8NWesW1xJcSzqY0LDy2A5/EGuVr0PwN/x43v/XE0AcJeQLb3LxoSQDjmq9XNU/4/pP8AeNU6AJ7aFZpQrEgH0ouYVhlKqSQPWpLD/Xr9aL//AF7fWgCpRRRQBLBGJJApJx7VLeWyW74QsfrTLT/XD61a1P8A1n4UAZ1FFFABRRRQAVLbxLLIFYkA+lRVZs/9ev1oAfe2kds+ELH6mqdaeq/638KzKACiiigB0ahnAPevR9C+Huk6n4XutTmuL1Z4QCqo6BT9crn9a85h/wBav1r3Lwj/AMk/1H/dFAHid9bpbXLxoWIBxzVar+rf8f0n1qhQAUUUUAFFFFABWppemw3r7ZWkA/2SP8Ky66Dw/wD638KAMm9tktpiiFiB61VrQ1T/AI+W+tZ9ABRRRQAUUUUAaWl6fFezKkjOAT/CR/hRqmnxWUzJGzkA/wARH+FWvD//AB9J9aPEH/H0/wBaAMOiiigAooooA6zwZ4XsvEWpxW13LcIjsATCyg/qDXT/ABJ+GujeDrjy9Pub+UbQf9IkRu3soqn8Lv8AkPW/+8K7746f8fo/3B/KgD57YYYihRlgKV/vmhPvCgDodK0C1voHeWSYFVyNrD/Cse9tUtpWRCxA9a63w7/x6Tf7hrmNV/4+W+tAGdSqMsAaSnR/fFAHS6R4cs7+3eSWScFVyNjAf0rF1C0jtZmRCxA/vGuy8Nf8eUv+4a5TWf8Aj6f60AZdFFFAGhplhFeTKkjOAT/CRW/rXhWx062jkhluGLLk72U/yFZWg/8AH0n1rsfFP/HjD/uCgDzaRQrkCm0+b/WGmUAKo3MAe9dt4V8Hafrk6JczXSBjz5TKP5qa4qP/AFi/WvVvh5/x9RfUUAc94q8Hafoc7pbTXThTx5rKf5KK4lhtYgdq9X+If/H1L9TXlMn+sb60ANooooAKKKKAFX7wq1d/dT6VVX7wq1d/dT6UAVKsWf8Arx9ar1Ys/wDXj60AF5/rz9ar1YvP9efrVegAq/Y/6t/92qFX7H/Vv/u0AUpP9YabTpP9YabQAV6H4G/48b3/AK4mvPK9D8Df8eN7/wBcTQBxGqf8f0n+8ap1c1T/AI/pP941ToAt2H+vX60X/wDr2+tFh/r1+tF//r2+tAFSiiigCe0/1w+tWtT/ANZ+FVbT/XD61a1P/WfhQBnUUUUAFFFFABVmz/16/Wq1WbP/AF6/WgC1qv8ArfwrMrT1X/W/hWZQAUUUUAPh/wBav1r3Lwj/AMk/1H/dFeGw/wCtX617l4R/5J/qP+6KAPGdW/4/pPrVCr+rf8f0n1qhQAUUUUAFFFFABXQeH/8AW/hXP10Hh/8A1v4UAZ+qf8fLfWs+tDVP+PlvrWfQAUUUUAFFFFAG54f/AOPpPrR4g/4+n+tHh/8A4+k+tHiD/j6f60AYdFFFABRRRQB6P8Lv+Q9b/wC8K7746f8AH6P9wfyrgfhd/wAh63/3hXffHT/j9H+4P5UAfPj/AHzQn3hQ/wB80J94UAdt4d/49Jv9w1zGq/8AHy31rp/Dv/HpN/uGuY1X/j5b60AZ1Oj++KbTo/vigDvfDX/HlL/uGuU1n/j6f611fhr/AI8pf9w1yms/8fT/AFoAy6KKKANrQf8Aj6T612Pin/jxh/3BXHaD/wAfSfWux8U/8eMP+4KAPN5v9YaZT5v9YaZQA6P/AFi/WvVvh5/x9RfUV5TH/rF+terfDz/j6i+ooAPiH/x9S/U15TJ/rG+terfEP/j6l+prymT/AFjfWgBtFFFABRRRQAq/eFWrv7qfSqq/eFWrv7qfSgCpViz/ANePrVerFn/rx9aAC8/15+tV6sXn+vP1qvQAVfsf9W/+7VCr9j/q3/3aAKUn+sNNp0n+sNNoAK9D8Df8eN7/ANcTXnleh+Bv+PG9/wCuJoA4jVP+P6T/AHjVOrmqf8f0n+8ap0AW7D/Xr9aL/wD17fWiw/16/Wi//wBe31oAqUUUUAT2n+uH1q1qf+s/Cqtp/rh9atan/rPwoAzqKKKACiiigAqzZ/69frVarNn/AK9frQBa1X/W/hWZWnqv+t/CsygAooooAfD/AK1frXuXhH/kn+o/7orw2H/Wr9a9y8I/8k/1H/dFAHjOrf8AH9J9aoVf1b/j+k+tUKACiiigAooooAK6Dw//AK38K5+ug8P/AOt/CgDP1T/j5b61n1oap/x8t9az6ACiiigAooooA3PD/wDx9J9aPEH/AB9P9aPD/wDx9J9aPEH/AB9P9aAMOiiigAooooA9H+F3/Iet/wDeFd98dP8Aj9H+4P5VwPwu/wCQ9b/7wrvvjp/x+j/cH8qAPnx/vmhPvCh/vmhPvCgDtvDv/HpN/uGuY1X/AI+W+tdP4d/49Jv9w1zGq/8AHy31oAzqdH98U2nR/fFAHe+Gv+PKX/cNcprP/H0/1rq/DX/HlL/uGuU1n/j6f60AZdFFFAG1oP8Ax9J9a7HxT/x4w/7grjtB/wCPpPrXY+Kf+PGH/cFAHm83+sNMp83+sNMoAdH/AKxfrXq3w8/4+ovqK8pj/wBYv1r1b4ef8fUX1FAB8Q/+PqX6mvKZP9Y31r1b4h/8fUv1NeUyf6xvrQA2iiigAooooAVThhU9xKjqu05wPSq9FABU1s6xyhmOBUNFAE1y6ySllORUNFFABVu1njjRg7YJHpVSigBzkFyR0ptFFABXZeFNd03TbS6S7ufLZ4yqjYxyfwFcbRQBZv5UmuneNsqTwcVWoooAsWkqRTBnOB9KLuVJZiyHI+lV6KACiiigCW3dUlBY4FWL6eOZ8xtkfQ1SooAKKKKACiiigAqa2kWOUFjgVDRQBf1C4inkzG+4fQiqFFFABRRRQA+NgrgnpmvVfDvjLQLDwfeWFzf7LmRQETyZDn8QuK8nooAt6hNHPdO8bblJ4OMVUoooAKKKKACiiigArY0e+trWTM0m0Y/uk/yrHooAuX88c87NG24E+mKp0UUAFFFFABRRRQBq6PeW9rcK00m1QeuCf5UaxeW91cM0Mm5SeuCP51lUUAFFFFABRRRQB2vgLXtM0XV4Z9QufJjVgS3ls38ga6/4r+OfDniW6D6RqP2ldoGfIkTt/tKK8booAViCxIoU4YE0lFAHVaLrFhaW8iTz7GK4A2Mf5CsLUJ4p52aNtwJ9CKpUUAFOQgMCabRQB1+h63p1nbSJPcbGKkAbGP8AIVz+p3MVxOzRPuBPoRVCigAooooA1NJu4LWdWmfaAeuCf5V0uv8AiDS720iS3ut7KgBHlsP5iuGooAfIwZyR0plFFADkIDgnpXoHg3xNpGk3EbXt35Sg8ny3b+QNee0UAeheMvE2katcSNZXfmqTwfLdf5gV5+5BckdKbRQAUUUUAFFFFABUzJbiyidZWNyZHEke3hUAXac9ySX4/wBketP057WPU7R76J5bRZkM8afeePcNwHI5IzXoGraZcWWi3V3qttYvpzwuLdbfw/JayLIVIjPmmFAAG2k5dsgEc5oA82oore8JJBNq7wPAstzLCVtDJatcokuVOWjUEsNocfdbkg4OKAMe6S3SVRbStJH5cZLMuCHKAuPoG3DPfFQ12njGyk07ToYNUgtv7TeYNFLa6S9ioiAbcGDRRbiSUx8pxg881xdABRRVmx06+1S4+z6fZXF3PjPl28TSNj1wATQBXZWRyjqVZTggjBBpKvaxb6pb6nM2sW1zb3szGWRbmIxuxYkk4IHU5qjQAUUVZsdOvtUuPs+n2Vxdz4z5dvE0jY9cAE0AV2Vkco6lWU4IIwQaSr2sW+qW+pzNrFtc297MxlkW5iMbsWJJOCB1Oao0AFKVZQpKkBhlSR1GccfkaWOOSaVYokZ5HIVVUZLE9gK09V0rXrC2tTq2mX9rBGnlQG5tmjGCzPgEgZ5Zj+NAGVRRRQAUu1ggcqdpJAOOCR1/mPzpOprXvNG8Q2ekRNe6VqMGno7SJJNaukYZwoJ3EY5Cr37UAZFFFFABRRRQAUUU6OOSaVYokZ5HIVVUZLE9gKAEKsoUlSAwypI6jOOPyNJWrqula9YW1qdW0y/tYI08qA3Ns0YwWZ8AkDPLMfxrKoAKKKKAClVWdwiKWZjgADJJpK0tFsNZu75J9Esby5ubZ1lU2sDSmNgcg4APcd6AM2iprq0uLK5e3u7eW3nQ4eKVCjL9QeRUNABRRRQAUUUUAFFFWbHTr7VLj7Pp9lcXc+M+XbxNI2PXABNAFdlZHKOpVlOCCMEGkq9rFvqlvqczaxbXNvezMZZFuYjG7FiSTggdTmqNABRRRQAUUUUAFFFTTpbrFbGGVnkaMmZSuAj72AA9RtCnPufSgCGiiigAooooAKKKmnS3WK2MMrPI0ZMylcBH3sAB6jaFOfc+lAENFFFABRU1mlvJewJdytFbNIolkVdxRM8kDuQM8VDQAUUUUAFFTWaW8l7Al3K0Vs0iiWRV3FEzyQO5AzxUNABRRRQAUUVMyW4sonWVjcmRxJHt4VAF2nPckl+P9ketAENFFFABRRRQAUUrKyHDKVOAcEY4IyKSgAooooAKKKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vroPCTMLy+RxGLKS0K3kjzmHy4t6chlDEHdsGArZzjBzXP10fgtJW1edoXnaRLdmFrAEL3fzKPKAcMp67sFW+50JoAueIEsbfw35GhyR3GmG7Rp5ftjzOkux9gw8MW0Eb+Qpzt68VyFdz4yS5bRIpLjT73Rdtwqrp11DDF5uVbMoWOKLO3GCSp++MHqK4agArqPCohk07U4NRaOLSXeHz5muWhZZAH2KCschbI3nGw/dzxiuXrrvBCT7dQkt4bq/dfLU6ZbRxSNcA7vnKyJICEx/cY/OOnNAEPioQx6dpkGnNHLpKPN5Ey3LTM0hCb1JaOMrgbDjYPvZ5zXL113jdJ9mnyXEN1YO3mKNMuY4o2twNvzhY0jAD5/uKfkPXiuRoAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaANjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rf1KLS7Xw9qa+Hp4rlJUT7bm+klaNBIhDKrQQj7+wZ+bGe2c1geFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKAL+iPdR6/pz2MSy3i3URgjf7rybhtB5HBOO9dlJHpMEGoz6JcQ3OqS2swniOpyygRlG8wqGt4w5C7iP3jdM84ritJTzdZsY/tf2PdcRr9pzjycsPnzkdOvXtXomqLfS6TfrNo+qaPGLeRjqU9vbRpcYUnYXSBGO/G3iRslu4oA8vooooAKKKKACtjwtJcR+IYGtkjd9kofzJDGojMbCRtw5XCbjkdMdD0rHrY8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpQBv6lFpdr4e1NfD08VykqJ9tzfSStGgkQhlVoIR9/YM/NjPbOa4ivRPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/+IfKenBrzugAooooAK6/w+ljceG/I1ySO30wXbtBL9seF3l2JvGEhl3ADZyVGN3XmuQruPB1rey6fAbeVJrZ7m4NzayQQThdkSMhVJVYKzklN2McD0oAx/FrMbyxRBGbKO0C2ciTmbzIt78lmVSTu3jBVcYxgYrn66Hxe5m1Czud8wFxaJKtvMULWylmAT5FVQCAHACrw447nnqACiiigAooooAK6jwqIZNO1ODUWji0l3h8+ZrloWWQB9igrHIWyN5xsP3c8Yrl667wQk+3UJLeG6v3Xy1OmW0cUjXAO75ysiSAhMf3GPzjpzQBD4qEMenaZBpzRy6SjzeRMty0zNIQm9SWjjK4Gw42D72ec1y9dd43SfZp8lxDdWDt5ijTLmOKNrcDb84WNIwA+f7in5D14rkaACiiigAooooAtadYTanepawFFdgzFpG2qiqpZmJ9AoJ/Cr91oCxWU91Z6vp2opbgNOtt5qtGpYKGxIiZG5lHy56ik8LI0niO0VLp7ZxvZXjKhmIRjsG7jL42c8fNznpXWeJVvJPDl41xpGo6CibGCXNvbwreneo2/u4IixGd/8Q+U9ODQB53RRRQAUUUUAWtOsJtTvUtYCiuwZi0jbVRVUszE+gUE/hV+60BYrKe6s9X07UUtwGnW281WjUsFDYkRMjcyj5c9RSeFkaTxHaKl09s43srxlQzEIx2Ddxl8bOePm5z0rrPEq3knhy8a40jUdBRNjBLm3t4VvTvUbf3cERYjO/8AiHynpwaAPO6KKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv8AQGgDnKKKKANXT9E+2WX2251Ky0+1MhiSS6Mh8x1ALALGjNwGXJxjkc1BqemSaXNErTQXEU0fmwzwMSkiZK5GQCOVYYIB4rq/BqXK6JLJb6fe61uuGVtOtYYZfKwq4lKyRS43ZwCFH3Dk9BWT40SVdXgaZ51ke3VjazhA9p8zDyiECqOm7AVfv9AaAOcooooAmtLWa+vYLS2TfPPIsUa5A3MxwBk+5rXfw2pimNprel3txDG0r20DSh9qgsxBeNUbABPDHgcZrO0lPN1mxj+1/Y91xGv2nOPJyw+fOR069e1eiaot9LpN+s2j6po8Yt5GOpT29tGlxhSdhdIEY78beJGyW7igDy+iiigAoorT0fSTqy36Rt/pENuJYY9yjzG8xFK89flZjgc/LQBRuLma6kEk7l3CJGCf7qKFUfgoA/Coq6nxL4dsdJtJzbi7jltbtbUtcSoy3WULGSNQAVAwvBLf6xeQeK5agAooooAKKKKALOnTW9vqdrNdwefbRzI80OceYgILL+IyK6P+2tDuri7N/ZxF0t51tLm2so4BIzRMqCSFTtGGIIZeQRzu4IKKAOTrT0O80yyup5NU01L+JoHWKN2cBJeCrHY6EjjB+boxPJAFFFABqOqWd7brHb6Dp1g4fcZbZ7hmIwflPmSuMc56Z4HPWsyiigAra0nUrCx0a+jubC1vJ5riDak6NkRhZd+11IKnJj6HnjIIFFFAFTUv7KbypdM+1x7s+ZBcbW8vpja4xuB56quMd6oUUUAFb3h5tDKNHrD+Xi8t5QfKZt8S7/MTK8ru3J27UUUAHiK+sb2C1MJtHu1kmEslpZi2jMWVEY2gAFuHOcZwwBJI4waKKALmlai2lanBfLbWt0YiT5N3CJYnBBBDKeD1/DqOa6PxF4stdU8P21lZW1tAXnlkni/s22jMQIi2hJI0U9UbJAUkEA5AySigDkKKKKALWmS2sGq2c19CZrSOdGniHV0DAsO3UZFdNLr+m3cV7Dc2ulJEttIga305Uku5vmEUi4UeUF+TKggEKeGJNFFAHH0UUUAFFFFABWhod1aWWrw3F9CJYFDggxCQKxUhW2MQG2sQ208HGD1oooA1JtW0a903U2nsY4tUeIRwS28CpFL+9Rt5QcRPsVh8vBBxgck83RRQAUUUUAFbem6ppFlo8kVzoNrf6h5+5ZrmSYL5RXBXEcqYIIyODncemBkooAz9RvIL24WS3021sECbTFbNKyk5PzHzHc55x1xwOOtVKKKACiiigAooooAK19G1DSrG1vhqGjw6hcOqG2MzyhEIPzAiORDgg9cnBUcck0UUAV9S1C2vvK+z6PZadszu+yvM3mZx18yR+mO2OpznjFCiigAooooAKKKKANPQLu1sdV+0XkUMsS284VJ4vMQyGFwmV/3yvPbrxin3k+i3lo80NrPYXoxiGJvNgk55xuO9MDJ5L59qKKAMmiiigAooooA0NGexTUf+JjxbNDMhbZv2u0TKjY9mKn8K3NUvdDGjTWVq9pMqW0AtjHZbJfPyPNdpSAxUgPwSR86gD5c0UUAcnRRRQB0Xh3xJDpVrLY3Wn2UsMr7xdNYQTzwnAHHmqQy8fd49iMnNbxVrA13xJfX0awrA80nk+VbRwZjLsVLBAMtg8k5PqTiiigDGooooA6LSNasNM0RkbT7O7u2mk3rc2yvuUoPLKucldjgkqMBg2DkdKviK6s7u7tntTbvKLcC6ktrcQRSS7mOVQBQBtKD7oyQTjnJKKAMeiiigCzp01vb6nazXcHn20cyPNDnHmICCy/iMiuj/ALa0O6uLs39nEXS3nW0ubayjgEjNEyoJIVO0YYghl5BHO7ggooA5OiiigArX0O/s9PTUpLq1trp3tQkEVzEXUv5sZPIwVOwP8wIPvzRRQBFqD6PPbrNYRXVrcFsPbORJHjByyvww5wNpB6/eNZtFFABRRRQAUUUUAf/Z", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAFoCAIAAAAElhK7AAAbC0lEQVR4Ae3dTaxdVdkAYG4vUOSvRQMUpI1JIRoSTUeYiDKhECKJRutPJCFGYiIJMSZqHDkyDMCBDgwTEhOVCQwalDAg0AjG3xAIN1ZhoBICJAoItJW/CqWujwN7n+909e5zetbad629nzvQdVbXfte7nnfv+3LvOZSTTvJFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gVWNjDD1dXVU0455eyzz77ooot27dp19dVXf/nLXw753HXXXffff//a2tqzzz576NChN99888iRIxuYp60JECBAYMACmwZ8NkcjQIAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIDBkAY1wyNV1NgIECBDoFNAIO4ksIECAAIEhC2iEQ66usxEgQIBAp4BG2ElkAQECBAgMWUAjHHJ1nY0AAQIEOgU0wk4iCwgQIEBgyAIa4ZCr62wECBAg0CmgEXYSWUCAAAECQxbQCIdcXWcjQIAAgU4BjbCTyAICBAgQGLKARjjk6jobAQIECHQKaISdRBYQIECAwJAFNMIhV9fZCBAgQKBTQCPsJLKAAAECBIYsoBEOubrORoAAAQKdAhphJ5EFBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqExgpbJ8E6W7srKyurq6efPmrVu3bt++fdeuXbt3796zZ08Iv3fv3n379q2trT3zzDMHDhw4fPjwkSNHjh49mmhnYQgQIECgLAF/xVpZ9ZANAQIECPQsoBH2DG47AgQIEChLQCMsqx6yIUCAAIGeBTTCnsFtR4AAAQJlCWiEZdVDNgQIECDQs4BG2DO47QgQIECgLAGNsKx6yIYAAQIEehbQCHsGtx0BAgQIlCWgEZZVD9kQIECAQM8CGmHP4LYjQIAAgbIENMKy6iEbAgQIEOhZQCPsGdx2BAgQIFCWgEZYVj1kQ4AAAQI9C2iEPYPbjgABAgTKEtAIy6qHbAgQIECgZwGNsGdw2xEgQIBAWQIaYVn1kA0BAgQI9CygEfYMbjsCBAgQKEtAIyyrHrIhQIAAgZ4FNMKewW1HgAABAmUJaIRl1UM2BAgQINCzgEbYM7jtCBAgQKAsAY2wrHrIhgABAgR6FtAIewa3HQECBAiUJaARllUP2RAgQIBAzwIaYc/gtiNAgACBsgQ0wrLqIRsCBAgQ6FlAI+wZ3HYECBAgUJaARlhWPWRDgAABAj0LaIQ9g9uOAAECBMoS0AjLqodsCBAgQKBnAY2wZ3DbESBAgEBZAhphWfWQDQECBAj0LKAR9gxuOwIECBAoS0AjLKsesiFAgACBngU0wp7BbUeAAAECZQlohGXVQzYECBAg0LOARtgzuO0IECBAoCwBjbCsesiGAAECBHoW0Ah7BrcdAQIECJQloBGWVQ/ZECBAgEDPAhphz+C2I0CAAIGyBDTCsuohGwIECBDoWUAj7BncdgQIECBQloBGWFY9ZEOAAAECPQtohD2D244AAQIEyhLQCMuqh2wIECBAoGcBjbBncNsRIECAQFkCGmFZ9ZANAQIECPQsoBH2DG47AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZBdYyb5DMRusrKxs2rTptNNO27Jly44dO3bt2rV79+49e/aEBPfu3btv3761tbWnn3764MGDb7zxxttvv3306NFicpcIAQIECOQS8Fes5ZIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIEAgl4BGmEtWXAIECBCoQkAjrKJMkiRAgACBXAIaYS5ZcQkQIECgCgGNsIoySZIAAQIEcglohLlkxSVAgACBKgQ0wirKJEkCBAgQyCWgEeaSFZcAAQIEqhDQCKsokyQJECBAIJeARphLVlwCBAgQqEJAI6yiTJIkQIAAgVwCGmEuWXEJECBAoAoBjbCKMkmSAAECBHIJaIS5ZMUlQIAAgSoENMIqyiRJAgQIEMgloBHmkhWXAAECBKoQ0AirKJMkCRAgQCCXgEaYS1ZcAgQIEKhCQCOsokySJECAAIFcAhphLllxCRAgQKAKAY2wijJJkgABAgRyCWiEuWTFJUCAAIEqBDTCKsokSQIECBDIJaAR5pIVlwABAgSqENAIqyiTJAkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILCAwMoCawe0dGVlZXV1dfPmzVu3bt2+ffuuXbt27969Z8+ecMS9e/fu27dvbW3tmWeeOXDgwOHDh48cOXL06NEBnd5RCBAgQKAV8FestRZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBBoBTTC1sKIAAECBEYooBGOsOiOTIAAAQKtgEbYWhgRIECAwAgFNMIRFt2RCRAgQKAV0AhbCyMCBAgQGKGARjjCojsyAQIECLQCGmFrYUSAAAECIxTQCEdYdEcmQIAAgVZAI2wtjAgQIEBghAIa4QiL7sgECBAg0ApohK2FEQECBAiMUEAjHGHRHZkAAQIEWgGNsLUwIkCAAIERCmiEIyy6IxMgQIBAK6ARthZGBAgQIDBCAY1whEV3ZAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEBi6wMnO+lZWVTZs2nXzyyZs3bz799NPDn7722muHDx9+66233n777aNHj86sP/ZliBAuP+WUU0477bQmwhtvvPHmm2+GIPNEODbmZGaS2Kmnnhoin3HGGWHy1VdfDZH/+9//TtI73oWd85OcQ9gtW7ZccMEFO3fuvPTSS8NVjz/++D/+8Y9//vOfBw8eDBvNn/+EMSAExve9731nnnlmiPbKK6+8/vrrATNQzIm5TuZBI3xNnGe2mGiHLcLXOhF6+KPgEL6a+yFU7ayzzgr7/uc//5nUrrkrlrkxjneQsHUgCruH+3BS2YsvvvhjH/vYZZddFi55+OGH//znP//973+f1Dfc5/Pf5MfbMdP85CCrq6vh5g+1DobhOO9///vDdi+99FK4OYNnuLXCg3DkyJFQ9ByYcx7tnbvy/27LyTeQs88+e+vWrR/4wAfC5S+++OKBAwcOHTo0+ZYyeQqy3qLBbYIWbryQxuTR/uhHPxpugMsvvzyk9Pvf/z7cBvv375885iG9cFtOGNc3nEQOt9Z0RcIxzz333BD2hRdeCIedrku4tUJp1o85LRwYQ+YTxpB8YDznnHNC/PPOOy8se/7550P8l19+OWCGhCffUialnw4yz7g5SPOdalKvsNG2bdtChH/961+T7QJO8x1s0eNMZ9J8Vwy38WSv888/P5Tmoosu2rFjR1j59NNPP/vss+GpfO655yb3zOT2nhxzOlTCsb9iLSGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECCQU0woSYQhEgQIBAfQIaYX01kzEBAgQIJBTQCBNiCkWAAAEC9QlohPXVTMYECBAgkFBAI0yIKRQBAgQI1CegEdZXMxkTIECAQEIBjTAhplAECBAgUJ+ARlhfzWRMgAABAgkFNMKEmEIRIECAQH0CGmF9NZMxAQIECCQU0AgTYgpFgAABAvUJaIT11UzGBAgQIJBQQCNMiCkUAQIECNQnoBHWVzMZEyBAgEBCAY0wIaZQBAgQIFCfgEZYX81kTIAAAQIJBTTChJhCESBAgEB9AhphfTWTMQECBAgkFNAIE2IKRYAAAQL1CWiE9dVMxgQIECCQUEAjTIgpFAECBAjUJ6AR1lczGRMgQIBAQgGNMCGmUAQIECBQn4BGWF/NZEyAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQWETgc5/73CLLI2uXjxAJaooAAQIECPQj8NBDDy250fIRlkzA5QQIECBA4HgC3f89wt/97nfHu3jO+eUjzLmRZQQIECBAgAABAgQIECAwJoHcb0CmjZ82WrTOPWwR3XehyUKSLCSNhehmFg/gCDMn8pIAgYUFcr8BmTZ+2mhRrB62iO670GQhSRaSxkJ0M4sHcISZE3lJoH+B+HuEW7ZsueWWW/bv3//8O19hcOutt4bJ+fNbPsKce+V+AzJt/LTRokQ9bBHdd6HJQpIsJI2F6GYWD+AIMyfykkApAr/85S+/853vfPCDH9z0zlcYhJdhcv78lo8w/15WEiBAgACBxALRf8yMTh5v4+ji6OTxIpgnQGBgAqW9o5kpn0xhm5shd/xmozDoc6/pfXsex381+u9///vb3/72hRdeOPmJMAzCyzA5f3LLR5h/LysJEKhC4Fvf+lZReWbKJ1PYhi53/GajMOhzr+l9ex7HG+FXv/rVbdu23X///c+98xUG4WWYnD+55SPMv5eVBAhUIVDa74Qy5ZMpbFPi3PGbjcKgz72m9zUmQIAAAQIENlpg+c98Lh9hHYOswcO+aeOnjRZl6WGL6L4LTRaSZCFpLEQ3s3gAR5g5kZcEGoENeVcy/qvRn//85y+88MI111wTfiMavsIg/GsUYbLJtXOwfIR1tsgaPOybNn7aaFGWHraI7rvQZCFJFpLGQnQziwdwhJkTeUmgESjoXcno74Wjk032M4Po4ujkzIXzvIzGiU7OE+3YNdFQ0cljrz12JnphdPLYa+eciUaLTs4ZMMeyaD7RyRy7NzGjO0Ynm0tKG0SzjU6Wlrl8CHQK3HzzzZ1rki+I/0S4/Gc+l4+wzlGzBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOIBHGHmRF4SaAS+//3vN+MNHoQ3IX74wx/+5S9/Cb8gDV9hEF6GyfnTWj7COntlDR72TRs/bbQoSw9bRPddaLKQJAtJYyG6mcUDOMLMibwkQIAAAQIECBAoTCD8I2fJf9fo8umt7502ftpo0cx72CK670KThSRZSBoL0c0srusIpWWbKZ9MYZvS547fbBQGfe41ve8GjuPvES7/sbTlI6yDkjV42Ddt/LTRoiw9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3re4cfQTaNHJ46UeXRydPF6EdeajcaKT6wRZ54+ioaKT6wRp/ih6YXSyuWTRQTRadHLRyAnXR/OJTibc9NhQ0R2jk8deW8hMNNvoZAkJRxOLTvaTbXTr6ORC+UQjRCcXCtssjoaKTjaXnPAgGjY6ecJblHZh/CfC5T+WtnyEdaSyBg/7po2fNlqUpYctovsuNFlIkoWksRDdzOK6jlBatpnyyRS2KX3u+M1GYdDnXtP7FjcOvyP2qdFlPjQ7XdHlMaejRcc9bBHdd6HJQpIsJI2F6GYW13WE0rLNlE+msE3pc8dvNgqDPvea3teYAAECBAgQKEkg/BOBT43u378//MVy4SsMbr311mByYiVaHrNz3x626Myhc0EhSTZ/b8XOnTt//etfh39NNvxvGHfmX86CQiTnBCkt20z5ZArbIOeO32wUBn3uNdm30Kdy+f++/PIRpgszM84aPOyVNn7aaDMUk5c9bBHdd6HJQpJ86KGHJmnfc889n//851dXVz/xiU/cd999C51lYxcXIjknQmnZZsonU9gGOXf8ZqMw6HOvyb6FPpXRDwhFJ6f5psfRxdHJ6avmHEfjRCfnDDizLBoqOjlzYfRl9MLoZPTyeSaj0aKT80TLtCaaT3QyUwKTsM0jN711M5l161TBpzNvYkYnmz/dwEE0sehkP0lGt45OLpRPNEJ0cqGwzeJoqOhkc8kJD6Jho5MnvMXMhc0DOL1LMzmzOMdLnxqNqKb90FTaaJF0K/mUVw8OUZyZyR07dtx4442f/exnH3/88Ztuuin8RBjGJ5988syykl8WIjknUWnZZsonU9gGOXf8ZqMw6HOvyb4b/lTGG+Hy/3355SNMF2ZmnDV42Ctt/LTRZigmL3vYIrrvQpOFJPmNb3zj1Vdf/fCHPxz+N7w1uH379q985Stf+9rXFjrLxi4uRHJOhNKyzZRPprANcu74zUZh0Odek30H8FROAxp3C/ziF7/oXmQFAQI1C3jMa67ee7lfffXVTzzxRPjA5E9/+tNzzz13Mn3vvfe+9+cb/P/hJ/cf/OAH5513XqY80n6E6fXXXz906NCB977eeuutyTBh8tdee+1TTz31t7/97corrwxP4IsvvviHP/zhIx/5SMItlg9V+E21/AFFiArkflqjm64zmfbpbjbK/Zj3+Yz3/6iWdpO8W9a1tbXwbfSMM84IPyOHf6/84x//ePiDhd66PHjw4Hvf+f/f/zf3zTKD8F3+61//+mOPPXb77beH33EtEyp6bXPSJB8s/PSnPx0S/tKXvjTZqwke3frEJh999NELLrhg9+7d4dd94R2v8L7Xpz71qQceeODEomW6avmbKkliWe/MJBl2BqnrCLmf1k6umQXNA5jk6W6C537M+3zG+39US7tJ3i3rb3/7202b3n37MLyP8sgjj4T3UZobqKn9OoOf/exn3/zmN9dZsMwfTTJZWVn5zGc+85vf/OZXv/rVFVdcsUzAmWubk6b6CNNZZ51122233X333RdeeGETfGbTZV42Me+4444mTpBpxiUMlr+pkpwi652ZJMPOIHUdIffT2sk1s6B5WFI93U38rI95k3YPz3j/j2ppN8m7Nb3hhhvCt9EvfvGLk9dnnnnm3r17X3nllabknYOtW7cGzW3btnWuPIEFzT0xuTb8wBrS++Mf/3gCoaKXPPnkk5MPFoafOJsPFk4/NtGrOic/+clP/ulPf/rrX//auXLRBXfdddd3v/vdU089dXJh+AzkF77whQcffHDROFnXL39TJUkv652ZJMPOIHUdIffT2sk1syDT093skukx7/MZ7/9RLe0maap5UnhrcPpNpvDjVzmfrLvzzjvbRN8bXXzxxe8Nl/3/q6666vrrr//e97734x//+Ec/+tGHPvShsOMll1yybNyTTtq8efN11123fJyZCKeffnrI9pxzzpnM/+QnPwnvFIZPJM8s2/CXJd9UG44z1ARyP62LuuV7uptMcjzmPT/jPT+qpd0kTSkNCBAgQIAAAQIECBAgQGDoAv8DNIwUyHDTngMAAAAASUVORK5CYII=", + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Making a gel to show the PCR products\n", + "im = gel(\n", + " [\n", + " GeneRuler_1kb_plus,\n", + " [pcr_product_F1],\n", + " [pcr_product_F2],\n", + " [pcr_product_F3],\n", + " [pcr_product_BAC],\n", + " ]\n", + ")\n", + "im.rotate(90, expand=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: True\n", + "size: 23827\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 28\n", + "/molecule_type=DNA\n", + "Dseq(o23827)\n", + "GCAG..ccgc\n", + "CGTC..ggcg\n" + ] + } + ], + "source": [ + "# Performing the Gibson Assembly. Note that the assembly class parameters should be given as a list.\n", + "\n", + "assembled = Assembly([Dseqrecord(pcr_product_F1), Dseqrecord(pcr_product_F2), Dseqrecord(pcr_product_F3), Dseqrecord(pcr_product_BAC)])\n", + "assembled_circ = assembled.assemble_circular()\n", + "\n", + "# Printing out the Gibson Assembly product\n", + "print(assembled_circ[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Example_Restriction.ipynb b/docs/notebooks/Example_Restriction.ipynb index 4c83bcfd..26dbf6d3 100755 --- a/docs/notebooks/Example_Restriction.ipynb +++ b/docs/notebooks/Example_Restriction.ipynb @@ -1,1014 +1,1014 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example of a Plasmid Restriction/Ligation Cloning\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "This example showcases a workflow of modelling molecular cloning with restriction enzymes, PCR, and ligases, to clone gene fragments into plasmids. This example constructs a synthetic plasmid by cloning the ase1 gene, which encodes a microtubule associated protein responsible for mitotic spindle assembly, into the pFA6a-kanMX6 cloning vector:\n", - "\n", - "1. The ase1 gene fragment is first cloned from a portion of the _S. pombe_ genome through PCR:\n", - "2. The pFA6a-kanMX6 cloning vector is then cleaved with AscI and SalI. The ase1 gene fragment is also cleaved with SalI and AscI\n", - "3. The fragment is ligated with the linearized pFA6a-kanMX6 vector.\n", - "\n", - "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Importing all necessary classes and methods\n", - "\n", - "from pydna.parsers import parse\n", - "from pydna.tm import tm_default\n", - "from pydna.amplify import pcr\n", - "from pydna.dseqrecord import Dseqrecord\n", - "from Bio.Restriction import SalI, AscI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS pFA6a-kanMX6 3938 bp ds-DNA circular SYN 16-JUN-2022\n", - "DEFINITION synthetic circular DNA.\n", - "ACCESSION .\n", - "VERSION .\n", - "KEYWORDS pFA6a-kanMX6.\n", - "SOURCE synthetic DNA construct\n", - " ORGANISM synthetic DNA construct\n", - " .\n", - "REFERENCE 1 (bases 1 to 3938)\n", - " AUTHORS Bahler J, Wu JQ, Longtine MS, Shah NG, McKenzie A 3rd, Steever AB,\n", - " Wach A, Philippsen P, Pringle JR\n", - " TITLE Heterologous modules for efficient and versatile PCR-based gene\n", - " targeting in Schizosaccharomyces pombe.\n", - " JOURNAL Yeast. 1998 Jul;14(10):943-51.\n", - " PUBMED 9717240\n", - "REFERENCE 2 (bases 1 to 3938)\n", - " AUTHORS .\n", - " TITLE Direct Submission\n", - " JOURNAL Exported Jun 16, 2022 from SnapGene Server 1.1.58\n", - " http://www.snapgene.com\n", - "FEATURES Location/Qualifiers\n", - " source 1..3938\n", - " /organism=\"synthetic DNA construct\"\n", - " /mol_type=\"other DNA\"\n", - " primer_bind complement(35..52)\n", - " /label=\"L4440\"\n", - " /note=\"L4440 vector, forward primer\"\n", - " rep_origin complement(206..794)\n", - " /direction=LEFT\n", - " /label=\"ori\"\n", - " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", - " replication\"\n", - " primer_bind complement(286..305)\n", - " /label=\"pBR322ori-F\"\n", - " /note=\"pBR322 origin, forward primer\"\n", - " CDS complement(965..1825)\n", - " /codon_start=1\n", - " /gene=\"bla\"\n", - " /product=\"beta-lactamase\"\n", - " /label=\"AmpR\"\n", - " /note=\"confers resistance to ampicillin, carbenicillin, and\n", - " related antibiotics\"\n", - " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", - " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", - " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", - " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", - " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", - " LIKHW\"\n", - " primer_bind 1588..1607\n", - " /label=\"Amp-R\"\n", - " /note=\"Ampicillin resistance gene, reverse primer\"\n", - " promoter complement(1826..1930)\n", - " /gene=\"bla\"\n", - " /label=\"AmpR promoter\"\n", - " primer_bind 1998..2016\n", - " /label=\"pBRforEco\"\n", - " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", - " primer\"\n", - " primer_bind complement(2054..2076)\n", - " /label=\"pGEX 3'\"\n", - " /note=\"pGEX vectors, reverse primer\"\n", - " primer_bind 2176..2195\n", - " /label=\"pRS-marker\"\n", - " /note=\"pRS vectors, use to sequence yeast selectable\n", - " marker\"\n", - " promoter 2276..2294\n", - " /label=\"SP6 promoter\"\n", - " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", - " primer_bind 2276..2293\n", - " /label=\"SP6\"\n", - " /note=\"SP6 promoter, forward primer\"\n", - " gene 2407..3763\n", - " /label=\"kanMX\"\n", - " /note=\"yeast selectable marker conferring kanamycin\n", - " resistance (Wach et al., 1994)\"\n", - " promoter 2407..2750\n", - " /label=\"TEF promoter\"\n", - " /note=\"Ashbya gossypii TEF promoter\"\n", - " CDS 2751..3560\n", - " /codon_start=1\n", - " /gene=\"aph(3')-Ia\"\n", - " /product=\"aminoglycoside phosphotransferase\"\n", - " /label=\"KanR\"\n", - " /note=\"confers resistance to kanamycin\"\n", - " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", - " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", - " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", - " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", - " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", - " primer_bind complement(2818..2837)\n", - " /label=\"Kan-R\"\n", - " /note=\"Kanamycin resistance gene, reverse primer\"\n", - " terminator 3566..3763\n", - " /label=\"TEF terminator\"\n", - " /note=\"Ashbya gossypii TEF terminator\"\n", - " primer_bind complement(3867..3886)\n", - " /label=\"T7\"\n", - " /note=\"T7 promoter, forward primer\"\n", - " promoter complement(3868..3886)\n", - " /label=\"T7 promoter\"\n", - " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", - "ORIGIN\n", - " 1 gaggcggttt gcgtattggg cgctcttccg cttcctcgct cactgactcg ctgcgctcgg\n", - " 61 tcgttcggct gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag\n", - " 121 aatcagggga taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc\n", - " 181 gtaaaaaggc cgcgttgctg gcgtttttcc ataggctccg cccccctgac gagcatcaca\n", - " 241 aaaatcgacg ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt\n", - " 301 ttccccctgg aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc\n", - " 361 tgtccgcctt tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc\n", - " 421 tcagttcggt gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc\n", - " 481 ccgaccgctg cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact\n", - " 541 tatcgccact ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg\n", - " 601 ctacagagtt cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta\n", - " 661 tctgcgctct gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca\n", - " 721 aacaaaccac cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa\n", - " 781 aaaaaggatc tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg\n", - " 841 aaaactcacg ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc\n", - " 901 ttttaaatta aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg\n", - " 961 acagttacca atgcttaatc agtgaggcac ctatctcagc gatctgtcta tttcgttcat\n", - " 1021 ccatagttgc ctgactcccc gtcgtgtaga taactacgat acgggagggc ttaccatctg\n", - " 1081 gccccagtgc tgcaatgata ccgcgagacc cacgctcacc ggctccagat ttatcagcaa\n", - " 1141 taaaccagcc agccggaagg gccgagcgca gaagtggtcc tgcaacttta tccgcctcca\n", - " 1201 tccagtctat taattgttgc cgggaagcta gagtaagtag ttcgccagtt aatagtttgc\n", - " 1261 gcaacgttgt tgccattgct acaggcatcg tggtgtcacg ctcgtcgttt ggtatggctt\n", - " 1321 cattcagctc cggttcccaa cgatcaaggc gagttacatg atcccccatg ttgtgcaaaa\n", - " 1381 aagcggttag ctccttcggt cctccgatcg ttgtcagaag taagttggcc gcagtgttat\n", - " 1441 cactcatggt tatggcagca ctgcataatt ctcttactgt catgccatcc gtaagatgct\n", - " 1501 tttctgtgac tggtgagtac tcaaccaagt cattctgaga atagtgtatg cggcgaccga\n", - " 1561 gttgctcttg cccggcgtca atacgggata ataccgcgcc acatagcaga actttaaaag\n", - " 1621 tgctcatcat tggaaaacgt tcttcggggc gaaaactctc aaggatctta ccgctgttga\n", - " 1681 gatccagttc gatgtaaccc actcgtgcac ccaactgatc ttcagcatct tttactttca\n", - " 1741 ccagcgtttc tgggtgagca aaaacaggaa ggcaaaatgc cgcaaaaaag ggaataaggg\n", - " 1801 cgacacggaa atgttgaata ctcatactct tcctttttca atattattga agcatttatc\n", - " 1861 agggttattg tctcatgagc ggatacatat ttgaatgtat ttagaaaaat aaacaaatag\n", - " 1921 gggttccgcg cacatttccc cgaaaagtgc cacctgacgt ctaagaaacc attattatca\n", - " 1981 tgacattaac ctataaaaat aggcgtatca cgaggccctt tcgtctcgcg cgtttcggtg\n", - " 2041 atgacggtga aaacctctga cacatgcagc tcccggagac ggtcacagct tgtctgtaag\n", - " 2101 cggatgccgg gagcagacaa gcccgtcagg gcgcgtcagc gggtgttggc gggtgtcggg\n", - " 2161 gctggcttaa ctatgcggca tcagagcaga ttgtactgag agtgcaccat atggacatat\n", - " 2221 tgtcgttaga acgcggctac aattaataca taaccttatg tatcatacac atacgattta\n", - " 2281 ggtgacacta tagaacgcgg ccgccagctg aagcttcgta cgctgcaggt cgacggatcc\n", - " 2341 ccgggttaat taaggcgcgc cagatctgtt tagcttgcct cgtccccgcc gggtcacccg\n", - " 2401 gccagcgaca tggaggccca gaataccctc cttgacagtc ttgacgtgcg cagctcaggg\n", - " 2461 gcatgatgtg actgtcgccc gtacatttag cccatacatc cccatgtata atcatttgca\n", - " 2521 tccatacatt ttgatggccg cacggcgcga agcaaaaatt acggctcctc gctgcagacc\n", - " 2581 tgcgagcagg gaaacgctcc cctcacagac gcgttgaatt gtccccacgc cgcgcccctg\n", - " 2641 tagagaaata taaaaggtta ggatttgcca ctgaggttct tctttcatat acttcctttt\n", - " 2701 aaaatcttgc taggatacag ttctcacatc acatccgaac ataaacaacc atgggtaagg\n", - " 2761 aaaagactca cgtttcgagg ccgcgattaa attccaacat ggatgctgat ttatatgggt\n", - " 2821 ataaatgggc tcgcgataat gtcgggcaat caggtgcgac aatctatcga ttgtatggga\n", - " 2881 agcccgatgc gccagagttg tttctgaaac atggcaaagg tagcgttgcc aatgatgtta\n", - " 2941 cagatgagat ggtcagacta aactggctga cggaatttat gcctcttccg accatcaagc\n", - " 3001 attttatccg tactcctgat gatgcatggt tactcaccac tgcgatcccc ggcaaaacag\n", - " 3061 cattccaggt attagaagaa tatcctgatt caggtgaaaa tattgttgat gcgctggcag\n", - " 3121 tgttcctgcg ccggttgcat tcgattcctg tttgtaattg tccttttaac agcgatcgcg\n", - " 3181 tatttcgtct cgctcaggcg caatcacgaa tgaataacgg tttggttgat gcgagtgatt\n", - " 3241 ttgatgacga gcgtaatggc tggcctgttg aacaagtctg gaaagaaatg cataagcttt\n", - " 3301 tgccattctc accggattca gtcgtcactc atggtgattt ctcacttgat aaccttattt\n", - " 3361 ttgacgaggg gaaattaata ggttgtattg atgttggacg agtcggaatc gcagaccgat\n", - " 3421 accaggatct tgccatccta tggaactgcc tcggtgagtt ttctccttca ttacagaaac\n", - " 3481 ggctttttca aaaatatggt attgataatc ctgatatgaa taaattgcag tttcatttga\n", - " 3541 tgctcgatga gtttttctaa tcagtactga caataaaaag attcttgttt tcaagaactt\n", - " 3601 gtcatttgta tagttttttt atattgtagt tgttctattt taatcaaatg ttagcgtgat\n", - " 3661 ttatattttt tttcgcctcg acatcatctg cccagatgcg aagttaagtg cgcagaaagt\n", - " 3721 aatatcatgc gtcaatcgta tgtgaatgct ggtcgctata ctgctgtcga ttcgatacta\n", - " 3781 acgccgccat ccagtttaaa cgagctcgaa ttcatcgatg atatcagatc cactagtggc\n", - " 3841 ctatgcggcc gcggatctgc cggtctccct atagtgagtc gtattaattt cgataagcca\n", - " 3901 ggttaacctg cattaatgaa tcggccaacg cgcgggga\n", - "//\n", - "LOCUS CU329670 4538 bp DNA linear PLN 26-APR-2024\n", - "DEFINITION Schizosaccharomyces pombe strain 972h- genome assembly, chromosome:\n", - " I.\n", - "ACCESSION CU329670\n", - "VERSION CU329670.1\n", - "DBLINK BioProject: PRJNA13836\n", - " BioSample: SAMEA3138176\n", - "KEYWORDS .\n", - "SOURCE Schizosaccharomyces pombe (fission yeast)\n", - " ORGANISM Schizosaccharomyces pombe\n", - " Eukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina;\n", - " Schizosaccharomycetes; Schizosaccharomycetales;\n", - " Schizosaccharomycetaceae; Schizosaccharomyces.\n", - "REFERENCE 1 (bases 1 to 4538)\n", - " AUTHORS Lang,B.F.\n", - " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", - " pombe: highly homologous introns are inserted at the same position\n", - " of the otherwise less conserved cox1 genes in Schizosaccharomyces\n", - " pombe and Aspergillus nidulans\n", - " JOURNAL EMBO J 3 (9), 2129-2136 (1984)\n", - " PUBMED 6092057\n", - "REFERENCE 2 (bases 1 to 4538)\n", - " AUTHORS Lang,B.F., Ahne,F. and Bonen,L.\n", - " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", - " pombe. The cytochrome b gene has an intron closely related to the\n", - " first two introns in the Saccharomyces cerevisiae cox1 gene\n", - " JOURNAL J Mol Biol 184 (3), 353-366 (1985)\n", - " PUBMED 4046021\n", - "REFERENCE 3 (bases 1 to 4538)\n", - " AUTHORS Lang,B.F., Cedergren,R. and Gray,M.W.\n", - " TITLE The mitochondrial genome of the fission yeast, Schizosaccharomyces\n", - " pombe. Sequence of the large-subunit ribosomal RNA gene, comparison\n", - " of potential secondary structure in fungal mitochondrial\n", - " large-subunit rRNAs and evolutionary considerations\n", - " JOURNAL Eur J Biochem 169 (3), 527-537 (1987)\n", - " PUBMED 2446871\n", - "REFERENCE 4 (bases 1 to 4538)\n", - " AUTHORS Trinkl,H., Lang,B.F. and Wolf,K.\n", - " TITLE Nucleotide sequence of the gene encoding the small ribosomal RNA in\n", - " the mitochondrial genome of the fission yeast Schizosaccharomyces\n", - " pombe\n", - " JOURNAL Nucleic Acids Res 17 (16), 6730 (1989)\n", - " PUBMED 2780299\n", - "REFERENCE 5 (bases 1 to 4538)\n", - " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", - " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", - " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", - " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", - " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", - " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", - " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", - " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", - " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", - " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", - " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", - " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", - " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", - " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", - " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", - " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", - " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", - " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", - " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", - " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", - " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", - " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", - " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", - " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", - " and Nurse,P.\n", - " TITLE The genome sequence of Schizosaccharomyces pombe\n", - " JOURNAL Nature 415 (6874), 871-880 (2002)\n", - " PUBMED 11859360\n", - " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", - " Cerutti L]]\n", - "REFERENCE 6\n", - " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", - " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", - " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", - " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", - " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", - " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", - " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", - " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", - " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", - " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", - " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", - " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", - " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", - " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", - " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", - " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", - " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", - " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", - " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", - " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", - " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", - " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", - " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", - " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", - " and Nurse,P.\n", - " TITLE The genome sequence of Schizosaccharomyces pombe\n", - " JOURNAL Nature 415 (6874), 871-880 (2002)\n", - " PUBMED 11859360\n", - " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", - " Cerutti L]]\n", - "REFERENCE 7 (bases 1 to 4538)\n", - " AUTHORS Schafer,B., Hansen,M. and Lang,B.F.\n", - " TITLE Transcription and RNA-processing in fission yeast mitochondria\n", - " JOURNAL RNA 11 (5), 785-795 (2005)\n", - " PUBMED 15811919\n", - "REFERENCE 8\n", - " AUTHORS Wood,V.\n", - " CONSRTM The Schizosaccharomyces pombe Genome Sequencing Consortium\n", - " TITLE Direct Submission\n", - " JOURNAL Submitted (29-JUN-2007) European Schizosaccharomyces genome\n", - " sequencing project, Sanger Institute, The Wellcome Trust Genome\n", - " Campus, Hinxton, Cambridge CB10 1SA\n", - "REFERENCE 9\n", - " AUTHORS Wood,V. and Rutherford,K.\n", - " CONSRTM PomBase\n", - " TITLE Direct Submission\n", - " JOURNAL Submitted (13-MAR-2024) University of Cambridge, PomBase, Hopkins\n", - " building, Tennis Court Rd, Cambridge, United Kingdom\n", - "COMMENT On or before Jan 26, 2012 this sequence version replaced\n", - " AL672256.4, AL009197.1, AL009227.1, AL021046.4, AL021809.4,\n", - " AL021813.1, AL021817.2, AL031180.3, AL034486.1, AL034565.1,\n", - " AL034583.1, AL035064.1, AL035248.2, AL035254.1, AL035439.1,\n", - " AL096845.1, AL109734.1, AL109738.1, AL109739.1, AL109770.1,\n", - " AL109820.1, AL109831.1, AL109832.1, AL109951.1, AL109988.1,\n", - " AL110469.1, AL110509.2, AL117210.1, AL117212.1, AL117213.1,\n", - " AL117390.1, AL121732.1, AL121741.1, AL121745.2, AL121764.1,\n", - " AL121765.1, AL121770.1, AL122032.1, AL132667.1, AL132675.1,\n", - " AL132714.1, AL132769.1, AL132779.2, AL132798.2, AL132828.1,\n", - " AL132839.1, AL132983.1, AL132984.1, AL133154.2, AL133156.1,\n", - " AL133157.1, AL133225.2, AL133302.1, AL133357.1, AL133359.1,\n", - " AL133360.1, AL133361.1, AL133442.1, AL133498.1, AL133521.1,\n", - " AL133522.1, AL135751.1, AL136078.1, AL136235.1, AL136499.1,\n", - " AL136521.2, AL136538.1, AL137130.1, AL138666.2, AL138854.1,\n", - " AL139315.1, AL157734.1, AL157811.1, AL157872.1, AL157917.1,\n", - " AL157993.1, AL157994.1, AL158056.1, AL159180.1, AL159951.1,\n", - " AL162531.1, AL162631.1, AL163031.1, AL163071.1, AL163191.2,\n", - " AL163481.1, AL163529.1, AL353014.1, AL353860.2, AL355012.1,\n", - " AL355013.1, AL355252.1, AL355452.1, AL355632.1, AL355652.1,\n", - " AL355653.1, AL356333.1, AL356335.1, AL357232.1, AL358272.1,\n", - " AL360054.1, AL360094.1, AL390095.1, AL390274.1, AL390814.1,\n", - " AL391713.1, AL391744.1, AL391746.2, AL391783.1, AL441621.1,\n", - " AL441624.1, AL512486.1, AL512487.1, AL512491.1, AL512493.1,\n", - " AL512496.1, AL512549.1, AL512562.1, AL583902.1, AL590562.1,\n", - " AL590582.1, AL590602.1, AL590605.1, AL590902.2, AL590903.1,\n", - " AL691401.1, AL691402.1, AL691405.1, Z49811.1, Z50112.1, Z50113.1,\n", - " Z50142.1, Z50728.2, Z54096.1, Z54142.2, Z54285.2, Z54308.1,\n", - " Z54328.1, Z54354.1, Z54366.1, Z56276.2, Z64354.1, Z66568.2,\n", - " Z67757.1, Z67961.2, Z67998.1, Z67999.1, Z68136.2, Z68144.1,\n", - " Z68166.1, Z68197.2, Z68198.1, Z68887.1, Z69086.1, Z69239.1,\n", - " Z69240.1, Z69368.1, Z69369.1, Z69380.1, Z69725.1, Z69726.1,\n", - " Z69727.1, Z69728.1, Z69729.1, Z69730.1, Z69731.1, Z69795.1,\n", - " Z69796.1, Z69944.1, Z70043.1, Z70690.1, Z70691.1, Z70721.1,\n", - " Z73099.2, Z73100.2, Z81312.1, Z81317.1, Z94864.1, Z95334.1,\n", - " Z95395.1, Z95396.2, Z97185.1, Z97208.1, Z97209.1, Z97210.2,\n", - " Z98056.2, Z98529.1, Z98530.2, Z98531.2, Z98532.1, Z98533.1,\n", - " Z98559.1, Z98560.1, Z98595.1, Z98596.1, Z98597.1, Z98598.1,\n", - " Z98600.1, Z98601.1, Z98602.1, Z98603.1, Z98762.1, Z98763.1,\n", - " Z98849.1, Z98944.1, Z98974.2, Z98975.1, Z98977.4, Z98978.1,\n", - " Z98979.1, Z98980.1, Z98981.3, Z99091.2, Z99126.1, Z99161.1,\n", - " Z99162.1, Z99163.2, Z99164.2, Z99165.1, Z99166.1, Z99167.1,\n", - " Z99168.1, Z99258.1, Z99259.1, Z99260.2, Z99261.1, Z99262.1,\n", - " Z99292.1, Z99295.1, Z99296.2, Z99531.1, Z99532.2, Z99568.2,\n", - " Z99753.1.\n", - "FEATURES Location/Qualifiers\n", - " source 1..4538\n", - " /organism=\"Schizosaccharomyces pombe\"\n", - " /mol_type=\"genomic DNA\"\n", - " /strain=\"972h-\"\n", - " /db_xref=\"taxon:4896\"\n", - " /chromosome=\"I\"\n", - " gene <1..676\n", - " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", - " CDS <1..393\n", - " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", - " /codon_start=1\n", - " /product=\"conserved fungal protein\"\n", - " /protein_id=\"CAC21481.2\"\n", - " /translation=\"MMTRMELRPLEIGFSKALTEVAPVTCQCECWDHNLCSSQASEMDL\n", - " IYQSQDTHSCASKQDAVFQLLSETKIPVPNRYRKISHRLSTLSNKKTLKSQLDRFLSSS\n", - " KKLHNDDVNRGDYCFLLSTPVECSASTNSHSYDCLWNFSCNSFPEYSSYSASETSSVAS\n", - " YSYYSGPNPATPSSSSCNLVNANSLDIYLNINNLKKSKSVPRLRGQFMEPVEHNHPLSK\n", - " SLEEQSSFLEQSKDASSNLTACNRSGSSLSSNFYSSRLSKKTSLASLNKSRASLQHKIM\n", - " SLSRNIIRRVFHKPEVHLDPSASILNLSSSHGESNLTNGLLCQNFKLFQDDWLMEDCAP\n", - " DANFTLYTPLQPWEKRSVKPEIRRPRLNPNFFRVFVLEAQMRRAGKLSANTAGRAQLIY\n", - " LPKPAVTFSTSPLHVEL\"\n", - " gene complement(<1..1972)\n", - " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", - " ncRNA complement(<1..1972)\n", - " /ncRNA_class=\"lncRNA\"\n", - " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", - " /product=\"non-coding RNA\"\n", - " 3'UTR 394..676\n", - " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", - " gene 1001..3538\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " 5'UTR 1001..1173\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " CDS join(1174..1597,1645..3416)\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " /codon_start=1\n", - " /product=\"antiparallel microtubule cross-linking factor\n", - " Ase1\"\n", - " /protein_id=\"CAC21482.1\"\n", - " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", - " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", - " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", - " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", - " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", - " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", - " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", - " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", - " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", - " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", - " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", - " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", - " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", - " 3'UTR 3417..3538\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " gene complement(3510..>4538)\n", - " /gene=\"ypt71\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", - " 3'UTR complement(3510..3690)\n", - " /gene=\"ypt71\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", - " CDS complement(join(3691..4137,4192..>4290))\n", - " /gene=\"ypt71\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", - " /codon_start=1\n", - " /product=\"GTPase Ypt71\"\n", - " /protein_id=\"CAC21483.1\"\n", - " /translation=\"MSAQKRVFLKVVILGDSGVGKTCLMNQFVNQKFSREYKATIGADF\n", - " LTKDVVVDDKLVTLQLWDTAGQERFQSLGMAFYRGADCCVIVYNVNNSKSFDSVENWRQ\n", - " EFLYQTSQDECAFPFIIVGNQIDKDASKRAVSLHRALDYCKSKHGSNMIHFEASAKENT\n", - " NVTDLFETVSRLALENESSRDDFVNDFSEPLLLSKPLNNTSSCNC\"\n", - " gene 4049..>4538\n", - " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", - " ncRNA 4049..>4538\n", - " /ncRNA_class=\"lncRNA\"\n", - " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", - " /product=\"non-coding RNA\"\n", - "ORIGIN\n", - " 1 atcatcagac gtgtatttca caagccagaa gtgcatttgg atccaagtgc ctccatttta\n", - " 61 aatctctcat cttcgcatgg cgaaagcaac ctgacaaatg gtttgctttg tcaaaatttc\n", - " 121 aagctttttc aggatgattg gttgatggag gattgtgcgc cagatgccaa tttcactttg\n", - " 181 tacaccccgc ttcaaccctg ggaaaagcga agtgtgaaac ctgaaatcag acgtcctcga\n", - " 241 ttaaatccta attttttccg agtatttgtt ttagaagctc aaatgcgacg agctggaaag\n", - " 301 ctatcagcaa acactgctgg ccgagcccag ttaatttacc tcccaaagcc tgccgttacc\n", - " 361 ttctccacta gccctttgca tgttgaattg taaaaattta acgcatgact tatatacatt\n", - " 421 tgcattcttc caagctggtt atatttattt tcattttttt ctcacccaat acttttttat\n", - " 481 ccctactgtc tttatggaca atcgactcac aattgtttct ttttgttgta tatgattttt\n", - " 541 tttttaaagg aaatgggttt cgcgatactg ggttgaatcc caattgcggt taatattaca\n", - " 601 taaaataatt ctcccatagt cctagatcct gtctttgaat atgagcaaat aaaagaattg\n", - " 661 aacaaatcat gaatgctttt ctctcttaga tgatattttg tatgcataag tctaattata\n", - " 721 ttgattacga taagacttaa aaagtaagcc tttgtatcct tttaagcagt atttgaattt\n", - " 781 tcttgtatca tattttaggt agagcaaaag ataccagttt gtagaacttt atgtgcttcc\n", - " 841 ttacattggt atatttcagg cacataaata ttcttcaact tacaattcta agtattttgt\n", - " 901 ttatactaaa aggagctgaa taacgtttat acagtgctga cattgaaatc tatttgcttt\n", - " 961 ctttggaata taagcgcatg ctgagttact ttcgcaggcc aagccatatc caaccaccat\n", - " 1021 ttttgtgcca agcttttatg caaggttaat tccttgtact gcttgttatg ttataatata\n", - " 1081 tcaacatctt aacagttttc atatcttcct ttatattcta ttaattgaat ttcaaacatc\n", - " 1141 gttttattga gctcatttac atcaaccggt tcaatgcaaa cagtaatgat ggatgacatt\n", - " 1201 caaagcactg attctattgc tgaaaaagat aatcactcta ataatgaatc taactttact\n", - " 1261 tggaaagcgt ttcgtgaaca agtggaaaag catttttcta aaattgaaag gcttcaccaa\n", - " 1321 gtccttggaa cagatggaga caattcatca ttatttgagt tgtttacaac ggcaatgaat\n", - " 1381 gcccagcttc atgaaatgga acagtgccag aaaaaacttg aagatgactg tcagcaaaga\n", - " 1441 attgattcaa tcagattttt ggtttcctca ttaaagttaa cggatgatac ttctagtctc\n", - " 1501 aaaattgagt ctcctttaat tcagtgtttg aatcgtttgt caatggtaga aggacaatat\n", - " 1561 atggcacagt atgatcaaaa gttaagtacg attaaaggta tgtaatcgtc tttaatttag\n", - " 1621 acttgtgttt taactgatgt atagaaatgt atcacaaatt ggagtcatat tgtaaccgct\n", - " 1681 taggaagtcc gttcgtttta cctgattttg agaattcatt tttatctgat gtatccgatg\n", - " 1741 cttttactga atctttgaga ggacgcatca acgaagccga aaaggagatt gatgcgagat\n", - " 1801 tagaggttat taattccttt gaagaagaaa ttttgggttt gtggtctgaa ctcggtgttg\n", - " 1861 agcccgctga tgttccacaa tacgaacaat tgcttgaatc ccatactaat cgaccaaatg\n", - " 1921 atgtttatgt tactcaagaa cttatcgacc aactttgcaa gcaaaaagaa gttttttccg\n", - " 1981 ctgaaaaaga aaagagaagt gatcatttaa aaagtataca atcagaagtt agcaacttgt\n", - " 2041 ggaataagct tcaagtttct cccaatgaac aaagtcaatt tggcgattca tcaaacatta\n", - " 2101 atcaagaaaa tatttcatta tgggaaactg aacttgaaaa acttcatcag ttaaaaaagg\n", - " 2161 agcatttacc cattttttta gaagactgtc gtcaacaaat tcttcagctt tgggattctc\n", - " 2221 tgttttattc agaagaacaa agaaagtcct ttacacctat gtatgaagac attattacag\n", - " 2281 agcaggttct tacggcccat gaaaactata taaagcaact agaggccgaa gtttctgcta\n", - " 2341 ataagtcctt tttaagctta attaatcgct atgcctcttt aatagaagga aagaaagagc\n", - " 2401 ttgaagctag ttctaatgat gcctctcgtc taacacaacg gggacgccgg gacccaggtt\n", - " 2461 tacttctacg tgaagagaaa atccgtaagc gactttctag agaacttcct aaggttcagt\n", - " 2521 cgctgcttat accagagatt acagcatggg aagaaagaaa tggaaggacg ttcctttttt\n", - " 2581 atgatgaacc acttctcaag atttgccaag aggccactca accaaaatca ttatatagaa\n", - " 2641 gtgcaagtgc tgccgcaaac cgcccgaaaa cagcaactac aacggactct gttaatagaa\n", - " 2701 caccttctca acgagggcgt gtagctgtac cttcaacacc aagtgttagg tccgcttctc\n", - " 2761 gagctatgac gagtccaagg acaccgcttc ctagagtaaa aaacactcaa aatccaagtc\n", - " 2821 gttccattag tgcagaaccg ccatcagcaa ccagtaccgc caatagaaga caccccactg\n", - " 2881 ctaatcgaat tgatataaac gctagattaa acagtgctag tcggtctcga agcgcgaaca\n", - " 2941 tgataagaca aggggcaaat ggtagtgaca gcaatatgtc ttcttcaccc gtttctggaa\n", - " 3001 attccaatac cccttttaac aagtttccaa attctgtatc tcgcaataca cattttgaat\n", - " 3061 ccaagtcacc gcacccaaat tactctcgaa ctcctcatga aacgtattca aaggcttcat\n", - " 3121 ctaagaacgt cccattaagt cctccaaagc agcgtgtagt taatgaacac gctttaaata\n", - " 3181 ttatgtcgga aaaattgcaa agaactaatc tgaaagaaca aacacccgag atggacattg\n", - " 3241 aaaacagctc gcagaacctt cctttttctc ctatgaagat atcccccata agagcatcac\n", - " 3301 ccgtaaagac aattccatca tcaccgtccc ccactaccaa cattttttct gctccactca\n", - " 3361 acaatattac aaattgtaca ccgatggagg atgaatgggg agaagaaggc ttttaagctt\n", - " 3421 cttatttacc taatcgatca aatttaaata tacatatttt tgcatatgaa tacagcatat\n", - " 3481 agataattca taaaagttta ttaactgagg tcataattaa aagactattt acacctaaaa\n", - " 3541 aaaaacgtgt atcaatagag ggaaaagaga agaattaaga acagaaagta accatagttt\n", - " 3601 tgttaaaata gcaatgtaaa aaaatattat gaaaagaaaa cgtatagcac attttgaaat\n", - " 3661 gtaaaagaat ctgagagagc gtgtgaatat ctagcaatta caagaagatg tattattcaa\n", - " 3721 aggctttgaa agaagcaaag gttcagagaa gtcattaaca aagtcatctc tcgagctttc\n", - " 3781 attttctaaa gctaaacgac tgactgtttc gaaaaggtca gtaacgtttg tattttcttt\n", - " 3841 tgcactagct tcaaaatgaa tcatatttga tccatgtttg gatttgcaat agtcaagagc\n", - " 3901 tcgatgaaga gatacggctc gtttagacgc gtctttgtcg atttgatttc caacgataat\n", - " 3961 gaaagggaat gcacattcat cttgtgaagt ttgatataaa aattcttgcc tccagttttc\n", - " 4021 tactgagtca aaagacttcg agttattcac attataaaca attacacaac aatcggcccc\n", - " 4081 tctgtaaaaa gccattccca ggctttgaaa tcgttcttga ccagcagtat cccaaagctg\n", - " 4141 tttataatta gcaaacgaat ttagatgggc ggaacttata ttggaactta cctgtaatgt\n", - " 4201 gaccaatttg tcgtcaacca caacgtcctt ggttaaaaaa tcagcaccga tggtagcttt\n", - " 4261 atattcgcga ctaaactttt gattgacgaa ctaaaatgac gatgttaaca aattgccaaa\n", - " 4321 gcaatactca tagagaagct gatgtaaaga tcgttaacca tatttgagct agtatttaat\n", - " 4381 aacaaagtga ataaatttta aaagcaatca ccttgtagcg acaaataaca acttatcgac\n", - " 4441 ataaaatcaa tgggaaattg cagtattgga ttttacagct caatacaaaa accaaaaaga\n", - " 4501 aaaatatact gaacgtataa aatttaacgc ttcaattg\n", - "//\n" - ] - } - ], - "source": [ - "# Parsing the files\n", - "pFA6akanMX6_path = \"./pFA6a-kanMX6.gb\"\n", - "ase1_path = \"./CU329670.gb\"\n", - "vector = parse(pFA6akanMX6_path)[0]\n", - "pombe_chromosome_I = parse(ase1_path)[0]\n", - "\n", - "# Printing the parsed files\n", - "\n", - "print(vector.format(\"gb\"))\n", - "print(pombe_chromosome_I.format(\"gb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ACCATGTCGACATGCAAACAGTAATGATGGA , Tm: 57.24061148156318\n", - "GGCGCGCCATTAAAAGCCTTCTTCTCCC , Tm: 56.64459495003314\n" - ] - } - ], - "source": [ - "from pydna.design import primer_design\n", - "#Finding the feature containing the CDS with ase1 as a type qualifier\n", - "gene = next(f for f in pombe_chromosome_I.features if f.type == \"CDS\" and\n", - " \"gene\" in f.qualifiers and\n", - " \"ase1\" in f.qualifiers[\"gene\"])\n", - "\n", - "# Using the primer_design function to design primers to amplify the CDS\n", - "# `min` and `max` can be used on a SeqFeature to get the start (leftmost) and end (rightmost) positions\n", - "# this works both on feature with SimpleLocation and CompoundLocation\n", - "amplicon = primer_design(pombe_chromosome_I[min(gene):max(gene)], target_tm=55)\n", - "\n", - "fwd_align, rvs_align = amplicon.primers()\n", - "fwd_primer_ase1 = Dseqrecord(\"ACCATGTCGAC\") + fwd_align # Adding a SalI cut site\n", - "rvs_primer_ase1 = Dseqrecord(\"GGCGCGCCAT\") + rvs_align # Adding a AscI cut site\n", - "\n", - "# Printing out the primers\n", - "\n", - "print(fwd_primer_ase1.seq, ', Tm: ', tm_default(fwd_align))\n", - "print(rvs_primer_ase1.seq, ', Tm: ', tm_default(rvs_align))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS 2263bp_PCR_prod 2263 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_description_description.\n", - "ACCESSION 2263bp\n", - "VERSION 2263bp\n", - "DBLINK BioProject: PRJNA13836\n", - " BioSample: SAMEA3138176\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " CDS join(12..435,483..2254)\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " /codon_start=1\n", - " /product=\"antiparallel microtubule cross-linking factor\n", - " Ase1\"\n", - " /protein_id=\"CAC21482.1\"\n", - " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", - " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", - " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", - " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", - " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", - " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", - " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", - " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", - " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", - " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", - " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", - " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", - " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", - " primer_bind 12..31\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(2236..2254)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 accatgtcga catgcaaaca gtaatgatgg atgacattca aagcactgat tctattgctg\n", - " 61 aaaaagataa tcactctaat aatgaatcta actttacttg gaaagcgttt cgtgaacaag\n", - " 121 tggaaaagca tttttctaaa attgaaaggc ttcaccaagt ccttggaaca gatggagaca\n", - " 181 attcatcatt atttgagttg tttacaacgg caatgaatgc ccagcttcat gaaatggaac\n", - " 241 agtgccagaa aaaacttgaa gatgactgtc agcaaagaat tgattcaatc agatttttgg\n", - " 301 tttcctcatt aaagttaacg gatgatactt ctagtctcaa aattgagtct cctttaattc\n", - " 361 agtgtttgaa tcgtttgtca atggtagaag gacaatatat ggcacagtat gatcaaaagt\n", - " 421 taagtacgat taaaggtatg taatcgtctt taatttagac ttgtgtttta actgatgtat\n", - " 481 agaaatgtat cacaaattgg agtcatattg taaccgctta ggaagtccgt tcgttttacc\n", - " 541 tgattttgag aattcatttt tatctgatgt atccgatgct tttactgaat ctttgagagg\n", - " 601 acgcatcaac gaagccgaaa aggagattga tgcgagatta gaggttatta attcctttga\n", - " 661 agaagaaatt ttgggtttgt ggtctgaact cggtgttgag cccgctgatg ttccacaata\n", - " 721 cgaacaattg cttgaatccc atactaatcg accaaatgat gtttatgtta ctcaagaact\n", - " 781 tatcgaccaa ctttgcaagc aaaaagaagt tttttccgct gaaaaagaaa agagaagtga\n", - " 841 tcatttaaaa agtatacaat cagaagttag caacttgtgg aataagcttc aagtttctcc\n", - " 901 caatgaacaa agtcaatttg gcgattcatc aaacattaat caagaaaata tttcattatg\n", - " 961 ggaaactgaa cttgaaaaac ttcatcagtt aaaaaaggag catttaccca tttttttaga\n", - " 1021 agactgtcgt caacaaattc ttcagctttg ggattctctg ttttattcag aagaacaaag\n", - " 1081 aaagtccttt acacctatgt atgaagacat tattacagag caggttctta cggcccatga\n", - " 1141 aaactatata aagcaactag aggccgaagt ttctgctaat aagtcctttt taagcttaat\n", - " 1201 taatcgctat gcctctttaa tagaaggaaa gaaagagctt gaagctagtt ctaatgatgc\n", - " 1261 ctctcgtcta acacaacggg gacgccggga cccaggttta cttctacgtg aagagaaaat\n", - " 1321 ccgtaagcga ctttctagag aacttcctaa ggttcagtcg ctgcttatac cagagattac\n", - " 1381 agcatgggaa gaaagaaatg gaaggacgtt ccttttttat gatgaaccac ttctcaagat\n", - " 1441 ttgccaagag gccactcaac caaaatcatt atatagaagt gcaagtgctg ccgcaaaccg\n", - " 1501 cccgaaaaca gcaactacaa cggactctgt taatagaaca ccttctcaac gagggcgtgt\n", - " 1561 agctgtacct tcaacaccaa gtgttaggtc cgcttctcga gctatgacga gtccaaggac\n", - " 1621 accgcttcct agagtaaaaa acactcaaaa tccaagtcgt tccattagtg cagaaccgcc\n", - " 1681 atcagcaacc agtaccgcca atagaagaca ccccactgct aatcgaattg atataaacgc\n", - " 1741 tagattaaac agtgctagtc ggtctcgaag cgcgaacatg ataagacaag gggcaaatgg\n", - " 1801 tagtgacagc aatatgtctt cttcacccgt ttctggaaat tccaataccc cttttaacaa\n", - " 1861 gtttccaaat tctgtatctc gcaatacaca ttttgaatcc aagtcaccgc acccaaatta\n", - " 1921 ctctcgaact cctcatgaaa cgtattcaaa ggcttcatct aagaacgtcc cattaagtcc\n", - " 1981 tccaaagcag cgtgtagtta atgaacacgc tttaaatatt atgtcggaaa aattgcaaag\n", - " 2041 aactaatctg aaagaacaaa cacccgagat ggacattgaa aacagctcgc agaaccttcc\n", - " 2101 tttttctcct atgaagatat cccccataag agcatcaccc gtaaagacaa ttccatcatc\n", - " 2161 accgtccccc actaccaaca ttttttctgc tccactcaac aatattacaa attgtacacc\n", - " 2221 gatggaggat gaatggggag aagaaggctt ttaatggcgc gcc\n", - "//\n" - ] - } - ], - "source": [ - "# Performing a PCR to check that the primers are specific. An error message is returned if otherwise.\n", - "\n", - "pcr_product = pcr(fwd_primer_ase1, rvs_primer_ase1, pombe_chromosome_I)\n", - "\n", - "# Printing out the PCR results\n", - "\n", - "print(pcr_product.format(\"gb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(Dseqrecord(-30), Dseqrecord(-3916))\n", - "(Dseqrecord(-10), Dseqrecord(-2255), Dseqrecord(-6))\n" - ] - } - ], - "source": [ - "# Cleaving the cloning vector with restriction enzymes\n", - "\n", - "plasmid_digests = vector.cut(SalI, AscI)\n", - "\n", - "# Cleaving the gene fragment with restriction enzymes\n", - "\n", - "gene_digests = Dseqrecord(pcr_product).cut(SalI, AscI)\n", - "\n", - "# Printing out the digests\n", - "print(plasmid_digests) \n", - "print(gene_digests)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS name 6163 bp DNA circular UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 52..1408\n", - " /label=\"kanMX\"\n", - " /note=\"yeast selectable marker conferring kanamycin\n", - " resistance (Wach et al., 1994)\"\n", - " promoter 52..395\n", - " /label=\"TEF promoter\"\n", - " /note=\"Ashbya gossypii TEF promoter\"\n", - " CDS 396..1205\n", - " /codon_start=1\n", - " /gene=\"aph(3')-Ia\"\n", - " /product=\"aminoglycoside phosphotransferase\"\n", - " /label=\"KanR\"\n", - " /note=\"confers resistance to kanamycin\"\n", - " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", - " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", - " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", - " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", - " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", - " primer_bind complement(463..482)\n", - " /label=\"Kan-R\"\n", - " /note=\"Kanamycin resistance gene, reverse primer\"\n", - " terminator 1211..1408\n", - " /label=\"TEF terminator\"\n", - " /note=\"Ashbya gossypii TEF terminator\"\n", - " primer_bind complement(1512..1531)\n", - " /label=\"T7\"\n", - " /note=\"T7 promoter, forward primer\"\n", - " promoter complement(1513..1531)\n", - " /label=\"T7 promoter\"\n", - " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", - " primer_bind complement(1618..1635)\n", - " /label=\"L4440\"\n", - " /note=\"L4440 vector, forward primer\"\n", - " rep_origin complement(1789..2377)\n", - " /direction=LEFT\n", - " /label=\"ori\"\n", - " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", - " replication\"\n", - " primer_bind complement(1869..1888)\n", - " /label=\"pBR322ori-F\"\n", - " /note=\"pBR322 origin, forward primer\"\n", - " CDS complement(2548..3408)\n", - " /codon_start=1\n", - " /gene=\"bla\"\n", - " /product=\"beta-lactamase\"\n", - " /label=\"AmpR\"\n", - " /note=\"confers resistance to ampicillin, carbenicillin, and\n", - " related antibiotics\"\n", - " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", - " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", - " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", - " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", - " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", - " LIKHW\"\n", - " primer_bind 3171..3190\n", - " /label=\"Amp-R\"\n", - " /note=\"Ampicillin resistance gene, reverse primer\"\n", - " promoter complement(3409..3513)\n", - " /gene=\"bla\"\n", - " /label=\"AmpR promoter\"\n", - " primer_bind 3581..3599\n", - " /label=\"pBRforEco\"\n", - " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", - " primer\"\n", - " primer_bind complement(3637..3659)\n", - " /label=\"pGEX 3'\"\n", - " /note=\"pGEX vectors, reverse primer\"\n", - " primer_bind 3759..3778\n", - " /label=\"pRS-marker\"\n", - " /note=\"pRS vectors, use to sequence yeast selectable\n", - " marker\"\n", - " promoter 3859..3877\n", - " /label=\"SP6 promoter\"\n", - " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", - " primer_bind 3859..3876\n", - " /label=\"SP6\"\n", - " /note=\"SP6 promoter, forward primer\"\n", - " CDS join(3918..4341,4389..6160)\n", - " /gene=\"ase1\"\n", - " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", - " /codon_start=1\n", - " /product=\"antiparallel microtubule cross-linking factor\n", - " Ase1\"\n", - " /protein_id=\"CAC21482.1\"\n", - " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", - " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", - " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", - " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", - " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", - " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", - " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", - " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", - " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", - " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", - " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", - " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", - " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", - " primer_bind 3918..3937\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(6142..6160)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer\n", - " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 cgcgccagat ctgtttagct tgcctcgtcc ccgccgggtc acccggccag cgacatggag\n", - " 61 gcccagaata ccctccttga cagtcttgac gtgcgcagct caggggcatg atgtgactgt\n", - " 121 cgcccgtaca tttagcccat acatccccat gtataatcat ttgcatccat acattttgat\n", - " 181 ggccgcacgg cgcgaagcaa aaattacggc tcctcgctgc agacctgcga gcagggaaac\n", - " 241 gctcccctca cagacgcgtt gaattgtccc cacgccgcgc ccctgtagag aaatataaaa\n", - " 301 ggttaggatt tgccactgag gttcttcttt catatacttc cttttaaaat cttgctagga\n", - " 361 tacagttctc acatcacatc cgaacataaa caaccatggg taaggaaaag actcacgttt\n", - " 421 cgaggccgcg attaaattcc aacatggatg ctgatttata tgggtataaa tgggctcgcg\n", - " 481 ataatgtcgg gcaatcaggt gcgacaatct atcgattgta tgggaagccc gatgcgccag\n", - " 541 agttgtttct gaaacatggc aaaggtagcg ttgccaatga tgttacagat gagatggtca\n", - " 601 gactaaactg gctgacggaa tttatgcctc ttccgaccat caagcatttt atccgtactc\n", - " 661 ctgatgatgc atggttactc accactgcga tccccggcaa aacagcattc caggtattag\n", - " 721 aagaatatcc tgattcaggt gaaaatattg ttgatgcgct ggcagtgttc ctgcgccggt\n", - " 781 tgcattcgat tcctgtttgt aattgtcctt ttaacagcga tcgcgtattt cgtctcgctc\n", - " 841 aggcgcaatc acgaatgaat aacggtttgg ttgatgcgag tgattttgat gacgagcgta\n", - " 901 atggctggcc tgttgaacaa gtctggaaag aaatgcataa gcttttgcca ttctcaccgg\n", - " 961 attcagtcgt cactcatggt gatttctcac ttgataacct tatttttgac gaggggaaat\n", - " 1021 taataggttg tattgatgtt ggacgagtcg gaatcgcaga ccgataccag gatcttgcca\n", - " 1081 tcctatggaa ctgcctcggt gagttttctc cttcattaca gaaacggctt tttcaaaaat\n", - " 1141 atggtattga taatcctgat atgaataaat tgcagtttca tttgatgctc gatgagtttt\n", - " 1201 tctaatcagt actgacaata aaaagattct tgttttcaag aacttgtcat ttgtatagtt\n", - " 1261 tttttatatt gtagttgttc tattttaatc aaatgttagc gtgatttata ttttttttcg\n", - " 1321 cctcgacatc atctgcccag atgcgaagtt aagtgcgcag aaagtaatat catgcgtcaa\n", - " 1381 tcgtatgtga atgctggtcg ctatactgct gtcgattcga tactaacgcc gccatccagt\n", - " 1441 ttaaacgagc tcgaattcat cgatgatatc agatccacta gtggcctatg cggccgcgga\n", - " 1501 tctgccggtc tccctatagt gagtcgtatt aatttcgata agccaggtta acctgcatta\n", - " 1561 atgaatcggc caacgcgcgg ggagaggcgg tttgcgtatt gggcgctctt ccgcttcctc\n", - " 1621 gctcactgac tcgctgcgct cggtcgttcg gctgcggcga gcggtatcag ctcactcaaa\n", - " 1681 ggcggtaata cggttatcca cagaatcagg ggataacgca ggaaagaaca tgtgagcaaa\n", - " 1741 aggccagcaa aaggccagga accgtaaaaa ggccgcgttg ctggcgtttt tccataggct\n", - " 1801 ccgcccccct gacgagcatc acaaaaatcg acgctcaagt cagaggtggc gaaacccgac\n", - " 1861 aggactataa agataccagg cgtttccccc tggaagctcc ctcgtgcgct ctcctgttcc\n", - " 1921 gaccctgccg cttaccggat acctgtccgc ctttctccct tcgggaagcg tggcgctttc\n", - " 1981 tcatagctca cgctgtaggt atctcagttc ggtgtaggtc gttcgctcca agctgggctg\n", - " 2041 tgtgcacgaa ccccccgttc agcccgaccg ctgcgcctta tccggtaact atcgtcttga\n", - " 2101 gtccaacccg gtaagacacg acttatcgcc actggcagca gccactggta acaggattag\n", - " 2161 cagagcgagg tatgtaggcg gtgctacaga gttcttgaag tggtggccta actacggcta\n", - " 2221 cactagaaga acagtatttg gtatctgcgc tctgctgaag ccagttacct tcggaaaaag\n", - " 2281 agttggtagc tcttgatccg gcaaacaaac caccgctggt agcggtggtt tttttgtttg\n", - " 2341 caagcagcag attacgcgca gaaaaaaagg atctcaagaa gatcctttga tcttttctac\n", - " 2401 ggggtctgac gctcagtgga acgaaaactc acgttaaggg attttggtca tgagattatc\n", - " 2461 aaaaaggatc ttcacctaga tccttttaaa ttaaaaatga agttttaaat caatctaaag\n", - " 2521 tatatatgag taaacttggt ctgacagtta ccaatgctta atcagtgagg cacctatctc\n", - " 2581 agcgatctgt ctatttcgtt catccatagt tgcctgactc cccgtcgtgt agataactac\n", - " 2641 gatacgggag ggcttaccat ctggccccag tgctgcaatg ataccgcgag acccacgctc\n", - " 2701 accggctcca gatttatcag caataaacca gccagccgga agggccgagc gcagaagtgg\n", - " 2761 tcctgcaact ttatccgcct ccatccagtc tattaattgt tgccgggaag ctagagtaag\n", - " 2821 tagttcgcca gttaatagtt tgcgcaacgt tgttgccatt gctacaggca tcgtggtgtc\n", - " 2881 acgctcgtcg tttggtatgg cttcattcag ctccggttcc caacgatcaa ggcgagttac\n", - " 2941 atgatccccc atgttgtgca aaaaagcggt tagctccttc ggtcctccga tcgttgtcag\n", - " 3001 aagtaagttg gccgcagtgt tatcactcat ggttatggca gcactgcata attctcttac\n", - " 3061 tgtcatgcca tccgtaagat gcttttctgt gactggtgag tactcaacca agtcattctg\n", - " 3121 agaatagtgt atgcggcgac cgagttgctc ttgcccggcg tcaatacggg ataataccgc\n", - " 3181 gccacatagc agaactttaa aagtgctcat cattggaaaa cgttcttcgg ggcgaaaact\n", - " 3241 ctcaaggatc ttaccgctgt tgagatccag ttcgatgtaa cccactcgtg cacccaactg\n", - " 3301 atcttcagca tcttttactt tcaccagcgt ttctgggtga gcaaaaacag gaaggcaaaa\n", - " 3361 tgccgcaaaa aagggaataa gggcgacacg gaaatgttga atactcatac tcttcctttt\n", - " 3421 tcaatattat tgaagcattt atcagggtta ttgtctcatg agcggataca tatttgaatg\n", - " 3481 tatttagaaa aataaacaaa taggggttcc gcgcacattt ccccgaaaag tgccacctga\n", - " 3541 cgtctaagaa accattatta tcatgacatt aacctataaa aataggcgta tcacgaggcc\n", - " 3601 ctttcgtctc gcgcgtttcg gtgatgacgg tgaaaacctc tgacacatgc agctcccgga\n", - " 3661 gacggtcaca gcttgtctgt aagcggatgc cgggagcaga caagcccgtc agggcgcgtc\n", - " 3721 agcgggtgtt ggcgggtgtc ggggctggct taactatgcg gcatcagagc agattgtact\n", - " 3781 gagagtgcac catatggaca tattgtcgtt agaacgcggc tacaattaat acataacctt\n", - " 3841 atgtatcata cacatacgat ttaggtgaca ctatagaacg cggccgccag ctgaagcttc\n", - " 3901 gtacgctgca ggtcgacatg caaacagtaa tgatggatga cattcaaagc actgattcta\n", - " 3961 ttgctgaaaa agataatcac tctaataatg aatctaactt tacttggaaa gcgtttcgtg\n", - " 4021 aacaagtgga aaagcatttt tctaaaattg aaaggcttca ccaagtcctt ggaacagatg\n", - " 4081 gagacaattc atcattattt gagttgttta caacggcaat gaatgcccag cttcatgaaa\n", - " 4141 tggaacagtg ccagaaaaaa cttgaagatg actgtcagca aagaattgat tcaatcagat\n", - " 4201 ttttggtttc ctcattaaag ttaacggatg atacttctag tctcaaaatt gagtctcctt\n", - " 4261 taattcagtg tttgaatcgt ttgtcaatgg tagaaggaca atatatggca cagtatgatc\n", - " 4321 aaaagttaag tacgattaaa ggtatgtaat cgtctttaat ttagacttgt gttttaactg\n", - " 4381 atgtatagaa atgtatcaca aattggagtc atattgtaac cgcttaggaa gtccgttcgt\n", - " 4441 tttacctgat tttgagaatt catttttatc tgatgtatcc gatgctttta ctgaatcttt\n", - " 4501 gagaggacgc atcaacgaag ccgaaaagga gattgatgcg agattagagg ttattaattc\n", - " 4561 ctttgaagaa gaaattttgg gtttgtggtc tgaactcggt gttgagcccg ctgatgttcc\n", - " 4621 acaatacgaa caattgcttg aatcccatac taatcgacca aatgatgttt atgttactca\n", - " 4681 agaacttatc gaccaacttt gcaagcaaaa agaagttttt tccgctgaaa aagaaaagag\n", - " 4741 aagtgatcat ttaaaaagta tacaatcaga agttagcaac ttgtggaata agcttcaagt\n", - " 4801 ttctcccaat gaacaaagtc aatttggcga ttcatcaaac attaatcaag aaaatatttc\n", - " 4861 attatgggaa actgaacttg aaaaacttca tcagttaaaa aaggagcatt tacccatttt\n", - " 4921 tttagaagac tgtcgtcaac aaattcttca gctttgggat tctctgtttt attcagaaga\n", - " 4981 acaaagaaag tcctttacac ctatgtatga agacattatt acagagcagg ttcttacggc\n", - " 5041 ccatgaaaac tatataaagc aactagaggc cgaagtttct gctaataagt cctttttaag\n", - " 5101 cttaattaat cgctatgcct ctttaataga aggaaagaaa gagcttgaag ctagttctaa\n", - " 5161 tgatgcctct cgtctaacac aacggggacg ccgggaccca ggtttacttc tacgtgaaga\n", - " 5221 gaaaatccgt aagcgacttt ctagagaact tcctaaggtt cagtcgctgc ttataccaga\n", - " 5281 gattacagca tgggaagaaa gaaatggaag gacgttcctt ttttatgatg aaccacttct\n", - " 5341 caagatttgc caagaggcca ctcaaccaaa atcattatat agaagtgcaa gtgctgccgc\n", - " 5401 aaaccgcccg aaaacagcaa ctacaacgga ctctgttaat agaacacctt ctcaacgagg\n", - " 5461 gcgtgtagct gtaccttcaa caccaagtgt taggtccgct tctcgagcta tgacgagtcc\n", - " 5521 aaggacaccg cttcctagag taaaaaacac tcaaaatcca agtcgttcca ttagtgcaga\n", - " 5581 accgccatca gcaaccagta ccgccaatag aagacacccc actgctaatc gaattgatat\n", - " 5641 aaacgctaga ttaaacagtg ctagtcggtc tcgaagcgcg aacatgataa gacaaggggc\n", - " 5701 aaatggtagt gacagcaata tgtcttcttc acccgtttct ggaaattcca ataccccttt\n", - " 5761 taacaagttt ccaaattctg tatctcgcaa tacacatttt gaatccaagt caccgcaccc\n", - " 5821 aaattactct cgaactcctc atgaaacgta ttcaaaggct tcatctaaga acgtcccatt\n", - " 5881 aagtcctcca aagcagcgtg tagttaatga acacgcttta aatattatgt cggaaaaatt\n", - " 5941 gcaaagaact aatctgaaag aacaaacacc cgagatggac attgaaaaca gctcgcagaa\n", - " 6001 ccttcctttt tctcctatga agatatcccc cataagagca tcacccgtaa agacaattcc\n", - " 6061 atcatcaccg tcccccacta ccaacatttt ttctgctcca ctcaacaata ttacaaattg\n", - " 6121 tacaccgatg gaggatgaat ggggagaaga aggcttttaa tgg\n", - "//\n" - ] - } - ], - "source": [ - "# Ligating, then circularising the synthetic plasmid\n", - "\n", - "synthetic_vector = plasmid_digests[1] + gene_digests [1]\n", - "synthetic_vector = synthetic_vector.looped()\n", - "\n", - "# Printing out the completed cloning vector\n", - "\n", - "print(synthetic_vector.format(\"gb\"))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of a Plasmid Restriction/Ligation Cloning\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "This example showcases a workflow of modelling molecular cloning with restriction enzymes, PCR, and ligases, to clone gene fragments into plasmids. This example constructs a synthetic plasmid by cloning the ase1 gene, which encodes a microtubule associated protein responsible for mitotic spindle assembly, into the pFA6a-kanMX6 cloning vector:\n", + "\n", + "1. The ase1 gene fragment is first cloned from a portion of the _S. pombe_ genome through PCR:\n", + "2. The pFA6a-kanMX6 cloning vector is then cleaved with AscI and SalI. The ase1 gene fragment is also cleaved with SalI and AscI\n", + "3. The fragment is ligated with the linearized pFA6a-kanMX6 vector.\n", + "\n", + "Source files can be found alongside this notebook, if you would like to follow along. Annotations are made alongside the code to describe key steps." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing all necessary classes and methods\n", + "\n", + "from pydna.parsers import parse\n", + "from pydna.tm import tm_default\n", + "from pydna.amplify import pcr\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from Bio.Restriction import SalI, AscI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS pFA6a-kanMX6 3938 bp ds-DNA circular SYN 16-JUN-2022\n", + "DEFINITION synthetic circular DNA.\n", + "ACCESSION .\n", + "VERSION .\n", + "KEYWORDS pFA6a-kanMX6.\n", + "SOURCE synthetic DNA construct\n", + " ORGANISM synthetic DNA construct\n", + " .\n", + "REFERENCE 1 (bases 1 to 3938)\n", + " AUTHORS Bahler J, Wu JQ, Longtine MS, Shah NG, McKenzie A 3rd, Steever AB,\n", + " Wach A, Philippsen P, Pringle JR\n", + " TITLE Heterologous modules for efficient and versatile PCR-based gene\n", + " targeting in Schizosaccharomyces pombe.\n", + " JOURNAL Yeast. 1998 Jul;14(10):943-51.\n", + " PUBMED 9717240\n", + "REFERENCE 2 (bases 1 to 3938)\n", + " AUTHORS .\n", + " TITLE Direct Submission\n", + " JOURNAL Exported Jun 16, 2022 from SnapGene Server 1.1.58\n", + " http://www.snapgene.com\n", + "FEATURES Location/Qualifiers\n", + " source 1..3938\n", + " /organism=\"synthetic DNA construct\"\n", + " /mol_type=\"other DNA\"\n", + " primer_bind complement(35..52)\n", + " /label=\"L4440\"\n", + " /note=\"L4440 vector, forward primer\"\n", + " rep_origin complement(206..794)\n", + " /direction=LEFT\n", + " /label=\"ori\"\n", + " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", + " replication\"\n", + " primer_bind complement(286..305)\n", + " /label=\"pBR322ori-F\"\n", + " /note=\"pBR322 origin, forward primer\"\n", + " CDS complement(965..1825)\n", + " /codon_start=1\n", + " /gene=\"bla\"\n", + " /product=\"beta-lactamase\"\n", + " /label=\"AmpR\"\n", + " /note=\"confers resistance to ampicillin, carbenicillin, and\n", + " related antibiotics\"\n", + " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", + " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", + " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", + " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", + " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", + " LIKHW\"\n", + " primer_bind 1588..1607\n", + " /label=\"Amp-R\"\n", + " /note=\"Ampicillin resistance gene, reverse primer\"\n", + " promoter complement(1826..1930)\n", + " /gene=\"bla\"\n", + " /label=\"AmpR promoter\"\n", + " primer_bind 1998..2016\n", + " /label=\"pBRforEco\"\n", + " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", + " primer\"\n", + " primer_bind complement(2054..2076)\n", + " /label=\"pGEX 3'\"\n", + " /note=\"pGEX vectors, reverse primer\"\n", + " primer_bind 2176..2195\n", + " /label=\"pRS-marker\"\n", + " /note=\"pRS vectors, use to sequence yeast selectable\n", + " marker\"\n", + " promoter 2276..2294\n", + " /label=\"SP6 promoter\"\n", + " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", + " primer_bind 2276..2293\n", + " /label=\"SP6\"\n", + " /note=\"SP6 promoter, forward primer\"\n", + " gene 2407..3763\n", + " /label=\"kanMX\"\n", + " /note=\"yeast selectable marker conferring kanamycin\n", + " resistance (Wach et al., 1994)\"\n", + " promoter 2407..2750\n", + " /label=\"TEF promoter\"\n", + " /note=\"Ashbya gossypii TEF promoter\"\n", + " CDS 2751..3560\n", + " /codon_start=1\n", + " /gene=\"aph(3')-Ia\"\n", + " /product=\"aminoglycoside phosphotransferase\"\n", + " /label=\"KanR\"\n", + " /note=\"confers resistance to kanamycin\"\n", + " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", + " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", + " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", + " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", + " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", + " primer_bind complement(2818..2837)\n", + " /label=\"Kan-R\"\n", + " /note=\"Kanamycin resistance gene, reverse primer\"\n", + " terminator 3566..3763\n", + " /label=\"TEF terminator\"\n", + " /note=\"Ashbya gossypii TEF terminator\"\n", + " primer_bind complement(3867..3886)\n", + " /label=\"T7\"\n", + " /note=\"T7 promoter, forward primer\"\n", + " promoter complement(3868..3886)\n", + " /label=\"T7 promoter\"\n", + " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", + "ORIGIN\n", + " 1 gaggcggttt gcgtattggg cgctcttccg cttcctcgct cactgactcg ctgcgctcgg\n", + " 61 tcgttcggct gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag\n", + " 121 aatcagggga taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc\n", + " 181 gtaaaaaggc cgcgttgctg gcgtttttcc ataggctccg cccccctgac gagcatcaca\n", + " 241 aaaatcgacg ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt\n", + " 301 ttccccctgg aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc\n", + " 361 tgtccgcctt tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc\n", + " 421 tcagttcggt gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc\n", + " 481 ccgaccgctg cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact\n", + " 541 tatcgccact ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg\n", + " 601 ctacagagtt cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta\n", + " 661 tctgcgctct gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca\n", + " 721 aacaaaccac cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa\n", + " 781 aaaaaggatc tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg\n", + " 841 aaaactcacg ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc\n", + " 901 ttttaaatta aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg\n", + " 961 acagttacca atgcttaatc agtgaggcac ctatctcagc gatctgtcta tttcgttcat\n", + " 1021 ccatagttgc ctgactcccc gtcgtgtaga taactacgat acgggagggc ttaccatctg\n", + " 1081 gccccagtgc tgcaatgata ccgcgagacc cacgctcacc ggctccagat ttatcagcaa\n", + " 1141 taaaccagcc agccggaagg gccgagcgca gaagtggtcc tgcaacttta tccgcctcca\n", + " 1201 tccagtctat taattgttgc cgggaagcta gagtaagtag ttcgccagtt aatagtttgc\n", + " 1261 gcaacgttgt tgccattgct acaggcatcg tggtgtcacg ctcgtcgttt ggtatggctt\n", + " 1321 cattcagctc cggttcccaa cgatcaaggc gagttacatg atcccccatg ttgtgcaaaa\n", + " 1381 aagcggttag ctccttcggt cctccgatcg ttgtcagaag taagttggcc gcagtgttat\n", + " 1441 cactcatggt tatggcagca ctgcataatt ctcttactgt catgccatcc gtaagatgct\n", + " 1501 tttctgtgac tggtgagtac tcaaccaagt cattctgaga atagtgtatg cggcgaccga\n", + " 1561 gttgctcttg cccggcgtca atacgggata ataccgcgcc acatagcaga actttaaaag\n", + " 1621 tgctcatcat tggaaaacgt tcttcggggc gaaaactctc aaggatctta ccgctgttga\n", + " 1681 gatccagttc gatgtaaccc actcgtgcac ccaactgatc ttcagcatct tttactttca\n", + " 1741 ccagcgtttc tgggtgagca aaaacaggaa ggcaaaatgc cgcaaaaaag ggaataaggg\n", + " 1801 cgacacggaa atgttgaata ctcatactct tcctttttca atattattga agcatttatc\n", + " 1861 agggttattg tctcatgagc ggatacatat ttgaatgtat ttagaaaaat aaacaaatag\n", + " 1921 gggttccgcg cacatttccc cgaaaagtgc cacctgacgt ctaagaaacc attattatca\n", + " 1981 tgacattaac ctataaaaat aggcgtatca cgaggccctt tcgtctcgcg cgtttcggtg\n", + " 2041 atgacggtga aaacctctga cacatgcagc tcccggagac ggtcacagct tgtctgtaag\n", + " 2101 cggatgccgg gagcagacaa gcccgtcagg gcgcgtcagc gggtgttggc gggtgtcggg\n", + " 2161 gctggcttaa ctatgcggca tcagagcaga ttgtactgag agtgcaccat atggacatat\n", + " 2221 tgtcgttaga acgcggctac aattaataca taaccttatg tatcatacac atacgattta\n", + " 2281 ggtgacacta tagaacgcgg ccgccagctg aagcttcgta cgctgcaggt cgacggatcc\n", + " 2341 ccgggttaat taaggcgcgc cagatctgtt tagcttgcct cgtccccgcc gggtcacccg\n", + " 2401 gccagcgaca tggaggccca gaataccctc cttgacagtc ttgacgtgcg cagctcaggg\n", + " 2461 gcatgatgtg actgtcgccc gtacatttag cccatacatc cccatgtata atcatttgca\n", + " 2521 tccatacatt ttgatggccg cacggcgcga agcaaaaatt acggctcctc gctgcagacc\n", + " 2581 tgcgagcagg gaaacgctcc cctcacagac gcgttgaatt gtccccacgc cgcgcccctg\n", + " 2641 tagagaaata taaaaggtta ggatttgcca ctgaggttct tctttcatat acttcctttt\n", + " 2701 aaaatcttgc taggatacag ttctcacatc acatccgaac ataaacaacc atgggtaagg\n", + " 2761 aaaagactca cgtttcgagg ccgcgattaa attccaacat ggatgctgat ttatatgggt\n", + " 2821 ataaatgggc tcgcgataat gtcgggcaat caggtgcgac aatctatcga ttgtatggga\n", + " 2881 agcccgatgc gccagagttg tttctgaaac atggcaaagg tagcgttgcc aatgatgtta\n", + " 2941 cagatgagat ggtcagacta aactggctga cggaatttat gcctcttccg accatcaagc\n", + " 3001 attttatccg tactcctgat gatgcatggt tactcaccac tgcgatcccc ggcaaaacag\n", + " 3061 cattccaggt attagaagaa tatcctgatt caggtgaaaa tattgttgat gcgctggcag\n", + " 3121 tgttcctgcg ccggttgcat tcgattcctg tttgtaattg tccttttaac agcgatcgcg\n", + " 3181 tatttcgtct cgctcaggcg caatcacgaa tgaataacgg tttggttgat gcgagtgatt\n", + " 3241 ttgatgacga gcgtaatggc tggcctgttg aacaagtctg gaaagaaatg cataagcttt\n", + " 3301 tgccattctc accggattca gtcgtcactc atggtgattt ctcacttgat aaccttattt\n", + " 3361 ttgacgaggg gaaattaata ggttgtattg atgttggacg agtcggaatc gcagaccgat\n", + " 3421 accaggatct tgccatccta tggaactgcc tcggtgagtt ttctccttca ttacagaaac\n", + " 3481 ggctttttca aaaatatggt attgataatc ctgatatgaa taaattgcag tttcatttga\n", + " 3541 tgctcgatga gtttttctaa tcagtactga caataaaaag attcttgttt tcaagaactt\n", + " 3601 gtcatttgta tagttttttt atattgtagt tgttctattt taatcaaatg ttagcgtgat\n", + " 3661 ttatattttt tttcgcctcg acatcatctg cccagatgcg aagttaagtg cgcagaaagt\n", + " 3721 aatatcatgc gtcaatcgta tgtgaatgct ggtcgctata ctgctgtcga ttcgatacta\n", + " 3781 acgccgccat ccagtttaaa cgagctcgaa ttcatcgatg atatcagatc cactagtggc\n", + " 3841 ctatgcggcc gcggatctgc cggtctccct atagtgagtc gtattaattt cgataagcca\n", + " 3901 ggttaacctg cattaatgaa tcggccaacg cgcgggga\n", + "//\n", + "LOCUS CU329670 4538 bp DNA linear PLN 26-APR-2024\n", + "DEFINITION Schizosaccharomyces pombe strain 972h- genome assembly, chromosome:\n", + " I.\n", + "ACCESSION CU329670\n", + "VERSION CU329670.1\n", + "DBLINK BioProject: PRJNA13836\n", + " BioSample: SAMEA3138176\n", + "KEYWORDS .\n", + "SOURCE Schizosaccharomyces pombe (fission yeast)\n", + " ORGANISM Schizosaccharomyces pombe\n", + " Eukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina;\n", + " Schizosaccharomycetes; Schizosaccharomycetales;\n", + " Schizosaccharomycetaceae; Schizosaccharomyces.\n", + "REFERENCE 1 (bases 1 to 4538)\n", + " AUTHORS Lang,B.F.\n", + " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", + " pombe: highly homologous introns are inserted at the same position\n", + " of the otherwise less conserved cox1 genes in Schizosaccharomyces\n", + " pombe and Aspergillus nidulans\n", + " JOURNAL EMBO J 3 (9), 2129-2136 (1984)\n", + " PUBMED 6092057\n", + "REFERENCE 2 (bases 1 to 4538)\n", + " AUTHORS Lang,B.F., Ahne,F. and Bonen,L.\n", + " TITLE The mitochondrial genome of the fission yeast Schizosaccharomyces\n", + " pombe. The cytochrome b gene has an intron closely related to the\n", + " first two introns in the Saccharomyces cerevisiae cox1 gene\n", + " JOURNAL J Mol Biol 184 (3), 353-366 (1985)\n", + " PUBMED 4046021\n", + "REFERENCE 3 (bases 1 to 4538)\n", + " AUTHORS Lang,B.F., Cedergren,R. and Gray,M.W.\n", + " TITLE The mitochondrial genome of the fission yeast, Schizosaccharomyces\n", + " pombe. Sequence of the large-subunit ribosomal RNA gene, comparison\n", + " of potential secondary structure in fungal mitochondrial\n", + " large-subunit rRNAs and evolutionary considerations\n", + " JOURNAL Eur J Biochem 169 (3), 527-537 (1987)\n", + " PUBMED 2446871\n", + "REFERENCE 4 (bases 1 to 4538)\n", + " AUTHORS Trinkl,H., Lang,B.F. and Wolf,K.\n", + " TITLE Nucleotide sequence of the gene encoding the small ribosomal RNA in\n", + " the mitochondrial genome of the fission yeast Schizosaccharomyces\n", + " pombe\n", + " JOURNAL Nucleic Acids Res 17 (16), 6730 (1989)\n", + " PUBMED 2780299\n", + "REFERENCE 5 (bases 1 to 4538)\n", + " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", + " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", + " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", + " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", + " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", + " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", + " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", + " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", + " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", + " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", + " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", + " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", + " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", + " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", + " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", + " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", + " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", + " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", + " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", + " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", + " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", + " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", + " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", + " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", + " and Nurse,P.\n", + " TITLE The genome sequence of Schizosaccharomyces pombe\n", + " JOURNAL Nature 415 (6874), 871-880 (2002)\n", + " PUBMED 11859360\n", + " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", + " Cerutti L]]\n", + "REFERENCE 6\n", + " AUTHORS Wood,V., Gwilliam,R., Rajandream,M.A., Lyne,M., Lyne,R., Stewart,A.,\n", + " Sgouros,J., Peat,N., Hayles,J., Baker,S., Basham,D., Bowman,S.,\n", + " Brooks,K., Brown,D., Brown,S., Chillingworth,T., Churcher,C.,\n", + " Collins,M., Connor,R., Cronin,A., Davis,P., Feltwell,T., Fraser,A.,\n", + " Gentles,S., Goble,A., Hamlin,N., Harris,D., Hidalgo,J., Hodgson,G.,\n", + " Holroyd,S., Hornsby,T., Howarth,S., Huckle,E.J., Hunt,S., Jagels,K.,\n", + " James,K., Jones,L., Jones,M., Leather,S., McDonald,S., McLean,J.,\n", + " Mooney,P., Moule,S., Mungall,K., Murphy,L., Niblett,D., Odell,C.,\n", + " Oliver,K., O'Neil,S., Pearson,D., Quail,M.A., Rabbinowitsch,E.,\n", + " Rutherford,K., Rutter,S., Saunders,D., Seeger,K., Sharp,S.,\n", + " Skelton,J., Simmonds,M., Squares,R., Squares,S., Stevens,K.,\n", + " Taylor,K., Taylor,R.G., Tivey,A., Walsh,S., Warren,T., Whitehead,S.,\n", + " Woodward,J., Volckaert,G., Aert,R., Robben,J., Grymonprez,B.,\n", + " Weltjens,I., Vanstreels,E., Rieger,M., Schafer,M., Muller-Auer,S.,\n", + " Gabel,C., Fuchs,M., Dusterhoft,A., Fritzc,C., Holzer,E., Moestl,D.,\n", + " Hilbert,H., Borzym,K., Langer,I., Beck,A., Lehrach,H., Reinhardt,R.,\n", + " Pohl,T.M., Eger,P., Zimmermann,W., Wedler,H., Wambutt,R.,\n", + " Purnelle,B., Goffeau,A., Cadieu,E., Dreano,S., Gloux,S., Lelaure,V.,\n", + " Mottier,S., Galibert,F., Aves,S.J., Xiang,Z., Hunt,C., Moore,K.,\n", + " Hurst,S.M., Lucas,M., Rochet,M., Gaillardin,C., Tallada,V.A.,\n", + " Garzon,A., Thode,G., Daga,R.R., Cruzado,L., Jimenez,J., Sanchez,M.,\n", + " del Rey,F., Benito,J., Dominguez,A., Revuelta,J.L., Moreno,S.,\n", + " Armstrong,J., Forsburg,S.L., Cerutti,L., Lowe,T., McCombie,W.R.,\n", + " Paulsen,I., Potashkin,J., Shpakovski,G.V., Ussery,D., Barrell,B.G.\n", + " and Nurse,P.\n", + " TITLE The genome sequence of Schizosaccharomyces pombe\n", + " JOURNAL Nature 415 (6874), 871-880 (2002)\n", + " PUBMED 11859360\n", + " REMARK Erratum:[Nature 2003 Jan 2;421(6918):94. Cerrutti L [corrected to\n", + " Cerutti L]]\n", + "REFERENCE 7 (bases 1 to 4538)\n", + " AUTHORS Schafer,B., Hansen,M. and Lang,B.F.\n", + " TITLE Transcription and RNA-processing in fission yeast mitochondria\n", + " JOURNAL RNA 11 (5), 785-795 (2005)\n", + " PUBMED 15811919\n", + "REFERENCE 8\n", + " AUTHORS Wood,V.\n", + " CONSRTM The Schizosaccharomyces pombe Genome Sequencing Consortium\n", + " TITLE Direct Submission\n", + " JOURNAL Submitted (29-JUN-2007) European Schizosaccharomyces genome\n", + " sequencing project, Sanger Institute, The Wellcome Trust Genome\n", + " Campus, Hinxton, Cambridge CB10 1SA\n", + "REFERENCE 9\n", + " AUTHORS Wood,V. and Rutherford,K.\n", + " CONSRTM PomBase\n", + " TITLE Direct Submission\n", + " JOURNAL Submitted (13-MAR-2024) University of Cambridge, PomBase, Hopkins\n", + " building, Tennis Court Rd, Cambridge, United Kingdom\n", + "COMMENT On or before Jan 26, 2012 this sequence version replaced\n", + " AL672256.4, AL009197.1, AL009227.1, AL021046.4, AL021809.4,\n", + " AL021813.1, AL021817.2, AL031180.3, AL034486.1, AL034565.1,\n", + " AL034583.1, AL035064.1, AL035248.2, AL035254.1, AL035439.1,\n", + " AL096845.1, AL109734.1, AL109738.1, AL109739.1, AL109770.1,\n", + " AL109820.1, AL109831.1, AL109832.1, AL109951.1, AL109988.1,\n", + " AL110469.1, AL110509.2, AL117210.1, AL117212.1, AL117213.1,\n", + " AL117390.1, AL121732.1, AL121741.1, AL121745.2, AL121764.1,\n", + " AL121765.1, AL121770.1, AL122032.1, AL132667.1, AL132675.1,\n", + " AL132714.1, AL132769.1, AL132779.2, AL132798.2, AL132828.1,\n", + " AL132839.1, AL132983.1, AL132984.1, AL133154.2, AL133156.1,\n", + " AL133157.1, AL133225.2, AL133302.1, AL133357.1, AL133359.1,\n", + " AL133360.1, AL133361.1, AL133442.1, AL133498.1, AL133521.1,\n", + " AL133522.1, AL135751.1, AL136078.1, AL136235.1, AL136499.1,\n", + " AL136521.2, AL136538.1, AL137130.1, AL138666.2, AL138854.1,\n", + " AL139315.1, AL157734.1, AL157811.1, AL157872.1, AL157917.1,\n", + " AL157993.1, AL157994.1, AL158056.1, AL159180.1, AL159951.1,\n", + " AL162531.1, AL162631.1, AL163031.1, AL163071.1, AL163191.2,\n", + " AL163481.1, AL163529.1, AL353014.1, AL353860.2, AL355012.1,\n", + " AL355013.1, AL355252.1, AL355452.1, AL355632.1, AL355652.1,\n", + " AL355653.1, AL356333.1, AL356335.1, AL357232.1, AL358272.1,\n", + " AL360054.1, AL360094.1, AL390095.1, AL390274.1, AL390814.1,\n", + " AL391713.1, AL391744.1, AL391746.2, AL391783.1, AL441621.1,\n", + " AL441624.1, AL512486.1, AL512487.1, AL512491.1, AL512493.1,\n", + " AL512496.1, AL512549.1, AL512562.1, AL583902.1, AL590562.1,\n", + " AL590582.1, AL590602.1, AL590605.1, AL590902.2, AL590903.1,\n", + " AL691401.1, AL691402.1, AL691405.1, Z49811.1, Z50112.1, Z50113.1,\n", + " Z50142.1, Z50728.2, Z54096.1, Z54142.2, Z54285.2, Z54308.1,\n", + " Z54328.1, Z54354.1, Z54366.1, Z56276.2, Z64354.1, Z66568.2,\n", + " Z67757.1, Z67961.2, Z67998.1, Z67999.1, Z68136.2, Z68144.1,\n", + " Z68166.1, Z68197.2, Z68198.1, Z68887.1, Z69086.1, Z69239.1,\n", + " Z69240.1, Z69368.1, Z69369.1, Z69380.1, Z69725.1, Z69726.1,\n", + " Z69727.1, Z69728.1, Z69729.1, Z69730.1, Z69731.1, Z69795.1,\n", + " Z69796.1, Z69944.1, Z70043.1, Z70690.1, Z70691.1, Z70721.1,\n", + " Z73099.2, Z73100.2, Z81312.1, Z81317.1, Z94864.1, Z95334.1,\n", + " Z95395.1, Z95396.2, Z97185.1, Z97208.1, Z97209.1, Z97210.2,\n", + " Z98056.2, Z98529.1, Z98530.2, Z98531.2, Z98532.1, Z98533.1,\n", + " Z98559.1, Z98560.1, Z98595.1, Z98596.1, Z98597.1, Z98598.1,\n", + " Z98600.1, Z98601.1, Z98602.1, Z98603.1, Z98762.1, Z98763.1,\n", + " Z98849.1, Z98944.1, Z98974.2, Z98975.1, Z98977.4, Z98978.1,\n", + " Z98979.1, Z98980.1, Z98981.3, Z99091.2, Z99126.1, Z99161.1,\n", + " Z99162.1, Z99163.2, Z99164.2, Z99165.1, Z99166.1, Z99167.1,\n", + " Z99168.1, Z99258.1, Z99259.1, Z99260.2, Z99261.1, Z99262.1,\n", + " Z99292.1, Z99295.1, Z99296.2, Z99531.1, Z99532.2, Z99568.2,\n", + " Z99753.1.\n", + "FEATURES Location/Qualifiers\n", + " source 1..4538\n", + " /organism=\"Schizosaccharomyces pombe\"\n", + " /mol_type=\"genomic DNA\"\n", + " /strain=\"972h-\"\n", + " /db_xref=\"taxon:4896\"\n", + " /chromosome=\"I\"\n", + " gene <1..676\n", + " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", + " CDS <1..393\n", + " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", + " /codon_start=1\n", + " /product=\"conserved fungal protein\"\n", + " /protein_id=\"CAC21481.2\"\n", + " /translation=\"MMTRMELRPLEIGFSKALTEVAPVTCQCECWDHNLCSSQASEMDL\n", + " IYQSQDTHSCASKQDAVFQLLSETKIPVPNRYRKISHRLSTLSNKKTLKSQLDRFLSSS\n", + " KKLHNDDVNRGDYCFLLSTPVECSASTNSHSYDCLWNFSCNSFPEYSSYSASETSSVAS\n", + " YSYYSGPNPATPSSSSCNLVNANSLDIYLNINNLKKSKSVPRLRGQFMEPVEHNHPLSK\n", + " SLEEQSSFLEQSKDASSNLTACNRSGSSLSSNFYSSRLSKKTSLASLNKSRASLQHKIM\n", + " SLSRNIIRRVFHKPEVHLDPSASILNLSSSHGESNLTNGLLCQNFKLFQDDWLMEDCAP\n", + " DANFTLYTPLQPWEKRSVKPEIRRPRLNPNFFRVFVLEAQMRRAGKLSANTAGRAQLIY\n", + " LPKPAVTFSTSPLHVEL\"\n", + " gene complement(<1..1972)\n", + " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", + " ncRNA complement(<1..1972)\n", + " /ncRNA_class=\"lncRNA\"\n", + " /locus_tag=\"SPOM_SPNCRNA.2846\"\n", + " /product=\"non-coding RNA\"\n", + " 3'UTR 394..676\n", + " /locus_tag=\"SPOM_SPAPB1A10.08\"\n", + " gene 1001..3538\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " 5'UTR 1001..1173\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " CDS join(1174..1597,1645..3416)\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " /codon_start=1\n", + " /product=\"antiparallel microtubule cross-linking factor\n", + " Ase1\"\n", + " /protein_id=\"CAC21482.1\"\n", + " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", + " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", + " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", + " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", + " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", + " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", + " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", + " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", + " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", + " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", + " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", + " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", + " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", + " 3'UTR 3417..3538\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " gene complement(3510..>4538)\n", + " /gene=\"ypt71\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", + " 3'UTR complement(3510..3690)\n", + " /gene=\"ypt71\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", + " CDS complement(join(3691..4137,4192..>4290))\n", + " /gene=\"ypt71\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.10C\"\n", + " /codon_start=1\n", + " /product=\"GTPase Ypt71\"\n", + " /protein_id=\"CAC21483.1\"\n", + " /translation=\"MSAQKRVFLKVVILGDSGVGKTCLMNQFVNQKFSREYKATIGADF\n", + " LTKDVVVDDKLVTLQLWDTAGQERFQSLGMAFYRGADCCVIVYNVNNSKSFDSVENWRQ\n", + " EFLYQTSQDECAFPFIIVGNQIDKDASKRAVSLHRALDYCKSKHGSNMIHFEASAKENT\n", + " NVTDLFETVSRLALENESSRDDFVNDFSEPLLLSKPLNNTSSCNC\"\n", + " gene 4049..>4538\n", + " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", + " ncRNA 4049..>4538\n", + " /ncRNA_class=\"lncRNA\"\n", + " /locus_tag=\"SPOM_SPNCRNA.2847\"\n", + " /product=\"non-coding RNA\"\n", + "ORIGIN\n", + " 1 atcatcagac gtgtatttca caagccagaa gtgcatttgg atccaagtgc ctccatttta\n", + " 61 aatctctcat cttcgcatgg cgaaagcaac ctgacaaatg gtttgctttg tcaaaatttc\n", + " 121 aagctttttc aggatgattg gttgatggag gattgtgcgc cagatgccaa tttcactttg\n", + " 181 tacaccccgc ttcaaccctg ggaaaagcga agtgtgaaac ctgaaatcag acgtcctcga\n", + " 241 ttaaatccta attttttccg agtatttgtt ttagaagctc aaatgcgacg agctggaaag\n", + " 301 ctatcagcaa acactgctgg ccgagcccag ttaatttacc tcccaaagcc tgccgttacc\n", + " 361 ttctccacta gccctttgca tgttgaattg taaaaattta acgcatgact tatatacatt\n", + " 421 tgcattcttc caagctggtt atatttattt tcattttttt ctcacccaat acttttttat\n", + " 481 ccctactgtc tttatggaca atcgactcac aattgtttct ttttgttgta tatgattttt\n", + " 541 tttttaaagg aaatgggttt cgcgatactg ggttgaatcc caattgcggt taatattaca\n", + " 601 taaaataatt ctcccatagt cctagatcct gtctttgaat atgagcaaat aaaagaattg\n", + " 661 aacaaatcat gaatgctttt ctctcttaga tgatattttg tatgcataag tctaattata\n", + " 721 ttgattacga taagacttaa aaagtaagcc tttgtatcct tttaagcagt atttgaattt\n", + " 781 tcttgtatca tattttaggt agagcaaaag ataccagttt gtagaacttt atgtgcttcc\n", + " 841 ttacattggt atatttcagg cacataaata ttcttcaact tacaattcta agtattttgt\n", + " 901 ttatactaaa aggagctgaa taacgtttat acagtgctga cattgaaatc tatttgcttt\n", + " 961 ctttggaata taagcgcatg ctgagttact ttcgcaggcc aagccatatc caaccaccat\n", + " 1021 ttttgtgcca agcttttatg caaggttaat tccttgtact gcttgttatg ttataatata\n", + " 1081 tcaacatctt aacagttttc atatcttcct ttatattcta ttaattgaat ttcaaacatc\n", + " 1141 gttttattga gctcatttac atcaaccggt tcaatgcaaa cagtaatgat ggatgacatt\n", + " 1201 caaagcactg attctattgc tgaaaaagat aatcactcta ataatgaatc taactttact\n", + " 1261 tggaaagcgt ttcgtgaaca agtggaaaag catttttcta aaattgaaag gcttcaccaa\n", + " 1321 gtccttggaa cagatggaga caattcatca ttatttgagt tgtttacaac ggcaatgaat\n", + " 1381 gcccagcttc atgaaatgga acagtgccag aaaaaacttg aagatgactg tcagcaaaga\n", + " 1441 attgattcaa tcagattttt ggtttcctca ttaaagttaa cggatgatac ttctagtctc\n", + " 1501 aaaattgagt ctcctttaat tcagtgtttg aatcgtttgt caatggtaga aggacaatat\n", + " 1561 atggcacagt atgatcaaaa gttaagtacg attaaaggta tgtaatcgtc tttaatttag\n", + " 1621 acttgtgttt taactgatgt atagaaatgt atcacaaatt ggagtcatat tgtaaccgct\n", + " 1681 taggaagtcc gttcgtttta cctgattttg agaattcatt tttatctgat gtatccgatg\n", + " 1741 cttttactga atctttgaga ggacgcatca acgaagccga aaaggagatt gatgcgagat\n", + " 1801 tagaggttat taattccttt gaagaagaaa ttttgggttt gtggtctgaa ctcggtgttg\n", + " 1861 agcccgctga tgttccacaa tacgaacaat tgcttgaatc ccatactaat cgaccaaatg\n", + " 1921 atgtttatgt tactcaagaa cttatcgacc aactttgcaa gcaaaaagaa gttttttccg\n", + " 1981 ctgaaaaaga aaagagaagt gatcatttaa aaagtataca atcagaagtt agcaacttgt\n", + " 2041 ggaataagct tcaagtttct cccaatgaac aaagtcaatt tggcgattca tcaaacatta\n", + " 2101 atcaagaaaa tatttcatta tgggaaactg aacttgaaaa acttcatcag ttaaaaaagg\n", + " 2161 agcatttacc cattttttta gaagactgtc gtcaacaaat tcttcagctt tgggattctc\n", + " 2221 tgttttattc agaagaacaa agaaagtcct ttacacctat gtatgaagac attattacag\n", + " 2281 agcaggttct tacggcccat gaaaactata taaagcaact agaggccgaa gtttctgcta\n", + " 2341 ataagtcctt tttaagctta attaatcgct atgcctcttt aatagaagga aagaaagagc\n", + " 2401 ttgaagctag ttctaatgat gcctctcgtc taacacaacg gggacgccgg gacccaggtt\n", + " 2461 tacttctacg tgaagagaaa atccgtaagc gactttctag agaacttcct aaggttcagt\n", + " 2521 cgctgcttat accagagatt acagcatggg aagaaagaaa tggaaggacg ttcctttttt\n", + " 2581 atgatgaacc acttctcaag atttgccaag aggccactca accaaaatca ttatatagaa\n", + " 2641 gtgcaagtgc tgccgcaaac cgcccgaaaa cagcaactac aacggactct gttaatagaa\n", + " 2701 caccttctca acgagggcgt gtagctgtac cttcaacacc aagtgttagg tccgcttctc\n", + " 2761 gagctatgac gagtccaagg acaccgcttc ctagagtaaa aaacactcaa aatccaagtc\n", + " 2821 gttccattag tgcagaaccg ccatcagcaa ccagtaccgc caatagaaga caccccactg\n", + " 2881 ctaatcgaat tgatataaac gctagattaa acagtgctag tcggtctcga agcgcgaaca\n", + " 2941 tgataagaca aggggcaaat ggtagtgaca gcaatatgtc ttcttcaccc gtttctggaa\n", + " 3001 attccaatac cccttttaac aagtttccaa attctgtatc tcgcaataca cattttgaat\n", + " 3061 ccaagtcacc gcacccaaat tactctcgaa ctcctcatga aacgtattca aaggcttcat\n", + " 3121 ctaagaacgt cccattaagt cctccaaagc agcgtgtagt taatgaacac gctttaaata\n", + " 3181 ttatgtcgga aaaattgcaa agaactaatc tgaaagaaca aacacccgag atggacattg\n", + " 3241 aaaacagctc gcagaacctt cctttttctc ctatgaagat atcccccata agagcatcac\n", + " 3301 ccgtaaagac aattccatca tcaccgtccc ccactaccaa cattttttct gctccactca\n", + " 3361 acaatattac aaattgtaca ccgatggagg atgaatgggg agaagaaggc ttttaagctt\n", + " 3421 cttatttacc taatcgatca aatttaaata tacatatttt tgcatatgaa tacagcatat\n", + " 3481 agataattca taaaagttta ttaactgagg tcataattaa aagactattt acacctaaaa\n", + " 3541 aaaaacgtgt atcaatagag ggaaaagaga agaattaaga acagaaagta accatagttt\n", + " 3601 tgttaaaata gcaatgtaaa aaaatattat gaaaagaaaa cgtatagcac attttgaaat\n", + " 3661 gtaaaagaat ctgagagagc gtgtgaatat ctagcaatta caagaagatg tattattcaa\n", + " 3721 aggctttgaa agaagcaaag gttcagagaa gtcattaaca aagtcatctc tcgagctttc\n", + " 3781 attttctaaa gctaaacgac tgactgtttc gaaaaggtca gtaacgtttg tattttcttt\n", + " 3841 tgcactagct tcaaaatgaa tcatatttga tccatgtttg gatttgcaat agtcaagagc\n", + " 3901 tcgatgaaga gatacggctc gtttagacgc gtctttgtcg atttgatttc caacgataat\n", + " 3961 gaaagggaat gcacattcat cttgtgaagt ttgatataaa aattcttgcc tccagttttc\n", + " 4021 tactgagtca aaagacttcg agttattcac attataaaca attacacaac aatcggcccc\n", + " 4081 tctgtaaaaa gccattccca ggctttgaaa tcgttcttga ccagcagtat cccaaagctg\n", + " 4141 tttataatta gcaaacgaat ttagatgggc ggaacttata ttggaactta cctgtaatgt\n", + " 4201 gaccaatttg tcgtcaacca caacgtcctt ggttaaaaaa tcagcaccga tggtagcttt\n", + " 4261 atattcgcga ctaaactttt gattgacgaa ctaaaatgac gatgttaaca aattgccaaa\n", + " 4321 gcaatactca tagagaagct gatgtaaaga tcgttaacca tatttgagct agtatttaat\n", + " 4381 aacaaagtga ataaatttta aaagcaatca ccttgtagcg acaaataaca acttatcgac\n", + " 4441 ataaaatcaa tgggaaattg cagtattgga ttttacagct caatacaaaa accaaaaaga\n", + " 4501 aaaatatact gaacgtataa aatttaacgc ttcaattg\n", + "//\n" + ] + } + ], + "source": [ + "# Parsing the files\n", + "pFA6akanMX6_path = \"./pFA6a-kanMX6.gb\"\n", + "ase1_path = \"./CU329670.gb\"\n", + "vector = parse(pFA6akanMX6_path)[0]\n", + "pombe_chromosome_I = parse(ase1_path)[0]\n", + "\n", + "# Printing the parsed files\n", + "\n", + "print(vector.format(\"gb\"))\n", + "print(pombe_chromosome_I.format(\"gb\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ACCATGTCGACATGCAAACAGTAATGATGGA , Tm: 57.24061148156318\n", + "GGCGCGCCATTAAAAGCCTTCTTCTCCC , Tm: 56.64459495003314\n" + ] + } + ], + "source": [ + "from pydna.design import primer_design\n", + "#Finding the feature containing the CDS with ase1 as a type qualifier\n", + "gene = next(f for f in pombe_chromosome_I.features if f.type == \"CDS\" and\n", + " \"gene\" in f.qualifiers and\n", + " \"ase1\" in f.qualifiers[\"gene\"])\n", + "\n", + "# Using the primer_design function to design primers to amplify the CDS\n", + "# `min` and `max` can be used on a SeqFeature to get the start (leftmost) and end (rightmost) positions\n", + "# this works both on feature with SimpleLocation and CompoundLocation\n", + "amplicon = primer_design(pombe_chromosome_I[min(gene):max(gene)], target_tm=55)\n", + "\n", + "fwd_align, rvs_align = amplicon.primers()\n", + "fwd_primer_ase1 = Dseqrecord(\"ACCATGTCGAC\") + fwd_align # Adding a SalI cut site\n", + "rvs_primer_ase1 = Dseqrecord(\"GGCGCGCCAT\") + rvs_align # Adding a AscI cut site\n", + "\n", + "# Printing out the primers\n", + "\n", + "print(fwd_primer_ase1.seq, ', Tm: ', tm_default(fwd_align))\n", + "print(rvs_primer_ase1.seq, ', Tm: ', tm_default(rvs_align))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS 2263bp_PCR_prod 2263 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_description_description.\n", + "ACCESSION 2263bp\n", + "VERSION 2263bp\n", + "DBLINK BioProject: PRJNA13836\n", + " BioSample: SAMEA3138176\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " CDS join(12..435,483..2254)\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " /codon_start=1\n", + " /product=\"antiparallel microtubule cross-linking factor\n", + " Ase1\"\n", + " /protein_id=\"CAC21482.1\"\n", + " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", + " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", + " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", + " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", + " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", + " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", + " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", + " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", + " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", + " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", + " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", + " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", + " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", + " primer_bind 12..31\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(2236..2254)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 accatgtcga catgcaaaca gtaatgatgg atgacattca aagcactgat tctattgctg\n", + " 61 aaaaagataa tcactctaat aatgaatcta actttacttg gaaagcgttt cgtgaacaag\n", + " 121 tggaaaagca tttttctaaa attgaaaggc ttcaccaagt ccttggaaca gatggagaca\n", + " 181 attcatcatt atttgagttg tttacaacgg caatgaatgc ccagcttcat gaaatggaac\n", + " 241 agtgccagaa aaaacttgaa gatgactgtc agcaaagaat tgattcaatc agatttttgg\n", + " 301 tttcctcatt aaagttaacg gatgatactt ctagtctcaa aattgagtct cctttaattc\n", + " 361 agtgtttgaa tcgtttgtca atggtagaag gacaatatat ggcacagtat gatcaaaagt\n", + " 421 taagtacgat taaaggtatg taatcgtctt taatttagac ttgtgtttta actgatgtat\n", + " 481 agaaatgtat cacaaattgg agtcatattg taaccgctta ggaagtccgt tcgttttacc\n", + " 541 tgattttgag aattcatttt tatctgatgt atccgatgct tttactgaat ctttgagagg\n", + " 601 acgcatcaac gaagccgaaa aggagattga tgcgagatta gaggttatta attcctttga\n", + " 661 agaagaaatt ttgggtttgt ggtctgaact cggtgttgag cccgctgatg ttccacaata\n", + " 721 cgaacaattg cttgaatccc atactaatcg accaaatgat gtttatgtta ctcaagaact\n", + " 781 tatcgaccaa ctttgcaagc aaaaagaagt tttttccgct gaaaaagaaa agagaagtga\n", + " 841 tcatttaaaa agtatacaat cagaagttag caacttgtgg aataagcttc aagtttctcc\n", + " 901 caatgaacaa agtcaatttg gcgattcatc aaacattaat caagaaaata tttcattatg\n", + " 961 ggaaactgaa cttgaaaaac ttcatcagtt aaaaaaggag catttaccca tttttttaga\n", + " 1021 agactgtcgt caacaaattc ttcagctttg ggattctctg ttttattcag aagaacaaag\n", + " 1081 aaagtccttt acacctatgt atgaagacat tattacagag caggttctta cggcccatga\n", + " 1141 aaactatata aagcaactag aggccgaagt ttctgctaat aagtcctttt taagcttaat\n", + " 1201 taatcgctat gcctctttaa tagaaggaaa gaaagagctt gaagctagtt ctaatgatgc\n", + " 1261 ctctcgtcta acacaacggg gacgccggga cccaggttta cttctacgtg aagagaaaat\n", + " 1321 ccgtaagcga ctttctagag aacttcctaa ggttcagtcg ctgcttatac cagagattac\n", + " 1381 agcatgggaa gaaagaaatg gaaggacgtt ccttttttat gatgaaccac ttctcaagat\n", + " 1441 ttgccaagag gccactcaac caaaatcatt atatagaagt gcaagtgctg ccgcaaaccg\n", + " 1501 cccgaaaaca gcaactacaa cggactctgt taatagaaca ccttctcaac gagggcgtgt\n", + " 1561 agctgtacct tcaacaccaa gtgttaggtc cgcttctcga gctatgacga gtccaaggac\n", + " 1621 accgcttcct agagtaaaaa acactcaaaa tccaagtcgt tccattagtg cagaaccgcc\n", + " 1681 atcagcaacc agtaccgcca atagaagaca ccccactgct aatcgaattg atataaacgc\n", + " 1741 tagattaaac agtgctagtc ggtctcgaag cgcgaacatg ataagacaag gggcaaatgg\n", + " 1801 tagtgacagc aatatgtctt cttcacccgt ttctggaaat tccaataccc cttttaacaa\n", + " 1861 gtttccaaat tctgtatctc gcaatacaca ttttgaatcc aagtcaccgc acccaaatta\n", + " 1921 ctctcgaact cctcatgaaa cgtattcaaa ggcttcatct aagaacgtcc cattaagtcc\n", + " 1981 tccaaagcag cgtgtagtta atgaacacgc tttaaatatt atgtcggaaa aattgcaaag\n", + " 2041 aactaatctg aaagaacaaa cacccgagat ggacattgaa aacagctcgc agaaccttcc\n", + " 2101 tttttctcct atgaagatat cccccataag agcatcaccc gtaaagacaa ttccatcatc\n", + " 2161 accgtccccc actaccaaca ttttttctgc tccactcaac aatattacaa attgtacacc\n", + " 2221 gatggaggat gaatggggag aagaaggctt ttaatggcgc gcc\n", + "//\n" + ] + } + ], + "source": [ + "# Performing a PCR to check that the primers are specific. An error message is returned if otherwise.\n", + "\n", + "pcr_product = pcr(fwd_primer_ase1, rvs_primer_ase1, pombe_chromosome_I)\n", + "\n", + "# Printing out the PCR results\n", + "\n", + "print(pcr_product.format(\"gb\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(Dseqrecord(-30), Dseqrecord(-3916))\n", + "(Dseqrecord(-10), Dseqrecord(-2255), Dseqrecord(-6))\n" + ] + } + ], + "source": [ + "# Cleaving the cloning vector with restriction enzymes\n", + "\n", + "plasmid_digests = vector.cut(SalI, AscI)\n", + "\n", + "# Cleaving the gene fragment with restriction enzymes\n", + "\n", + "gene_digests = Dseqrecord(pcr_product).cut(SalI, AscI)\n", + "\n", + "# Printing out the digests\n", + "print(plasmid_digests) \n", + "print(gene_digests)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS name 6163 bp DNA circular UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 52..1408\n", + " /label=\"kanMX\"\n", + " /note=\"yeast selectable marker conferring kanamycin\n", + " resistance (Wach et al., 1994)\"\n", + " promoter 52..395\n", + " /label=\"TEF promoter\"\n", + " /note=\"Ashbya gossypii TEF promoter\"\n", + " CDS 396..1205\n", + " /codon_start=1\n", + " /gene=\"aph(3')-Ia\"\n", + " /product=\"aminoglycoside phosphotransferase\"\n", + " /label=\"KanR\"\n", + " /note=\"confers resistance to kanamycin\"\n", + " /translation=\"MGKEKTHVSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYGKP\n", + " DAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGKTA\n", + " FQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDASD\n", + " FDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGIAD\n", + " RYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF\"\n", + " primer_bind complement(463..482)\n", + " /label=\"Kan-R\"\n", + " /note=\"Kanamycin resistance gene, reverse primer\"\n", + " terminator 1211..1408\n", + " /label=\"TEF terminator\"\n", + " /note=\"Ashbya gossypii TEF terminator\"\n", + " primer_bind complement(1512..1531)\n", + " /label=\"T7\"\n", + " /note=\"T7 promoter, forward primer\"\n", + " promoter complement(1513..1531)\n", + " /label=\"T7 promoter\"\n", + " /note=\"promoter for bacteriophage T7 RNA polymerase\"\n", + " primer_bind complement(1618..1635)\n", + " /label=\"L4440\"\n", + " /note=\"L4440 vector, forward primer\"\n", + " rep_origin complement(1789..2377)\n", + " /direction=LEFT\n", + " /label=\"ori\"\n", + " /note=\"high-copy-number ColE1/pMB1/pBR322/pUC origin of\n", + " replication\"\n", + " primer_bind complement(1869..1888)\n", + " /label=\"pBR322ori-F\"\n", + " /note=\"pBR322 origin, forward primer\"\n", + " CDS complement(2548..3408)\n", + " /codon_start=1\n", + " /gene=\"bla\"\n", + " /product=\"beta-lactamase\"\n", + " /label=\"AmpR\"\n", + " /note=\"confers resistance to ampicillin, carbenicillin, and\n", + " related antibiotics\"\n", + " /translation=\"MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI\n", + " ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS\n", + " PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW\n", + " EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA\n", + " LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS\n", + " LIKHW\"\n", + " primer_bind 3171..3190\n", + " /label=\"Amp-R\"\n", + " /note=\"Ampicillin resistance gene, reverse primer\"\n", + " promoter complement(3409..3513)\n", + " /gene=\"bla\"\n", + " /label=\"AmpR promoter\"\n", + " primer_bind 3581..3599\n", + " /label=\"pBRforEco\"\n", + " /note=\"pBR322 vectors, upsteam of EcoRI site, forward\n", + " primer\"\n", + " primer_bind complement(3637..3659)\n", + " /label=\"pGEX 3'\"\n", + " /note=\"pGEX vectors, reverse primer\"\n", + " primer_bind 3759..3778\n", + " /label=\"pRS-marker\"\n", + " /note=\"pRS vectors, use to sequence yeast selectable\n", + " marker\"\n", + " promoter 3859..3877\n", + " /label=\"SP6 promoter\"\n", + " /note=\"promoter for bacteriophage SP6 RNA polymerase\"\n", + " primer_bind 3859..3876\n", + " /label=\"SP6\"\n", + " /note=\"SP6 promoter, forward primer\"\n", + " CDS join(3918..4341,4389..6160)\n", + " /gene=\"ase1\"\n", + " /locus_tag=\"SPOM_SPAPB1A10.09\"\n", + " /codon_start=1\n", + " /product=\"antiparallel microtubule cross-linking factor\n", + " Ase1\"\n", + " /protein_id=\"CAC21482.1\"\n", + " /translation=\"MQTVMMDDIQSTDSIAEKDNHSNNESNFTWKAFREQVEKHFSKIE\n", + " RLHQVLGTDGDNSSLFELFTTAMNAQLHEMEQCQKKLEDDCQQRIDSIRFLVSSLKLTD\n", + " DTSSLKIESPLIQCLNRLSMVEGQYMAQYDQKLSTIKEMYHKLESYCNRLGSPFVLPDF\n", + " ENSFLSDVSDAFTESLRGRINEAEKEIDARLEVINSFEEEILGLWSELGVEPADVPQYE\n", + " QLLESHTNRPNDVYVTQELIDQLCKQKEVFSAEKEKRSDHLKSIQSEVSNLWNKLQVSP\n", + " NEQSQFGDSSNINQENISLWETELEKLHQLKKEHLPIFLEDCRQQILQLWDSLFYSEEQ\n", + " RKSFTPMYEDIITEQVLTAHENYIKQLEAEVSANKSFLSLINRYASLIEGKKELEASSN\n", + " DASRLTQRGRRDPGLLLREEKIRKRLSRELPKVQSLLIPEITAWEERNGRTFLFYDEPL\n", + " LKICQEATQPKSLYRSASAAANRPKTATTTDSVNRTPSQRGRVAVPSTPSVRSASRAMT\n", + " SPRTPLPRVKNTQNPSRSISAEPPSATSTANRRHPTANRIDINARLNSASRSRSANMIR\n", + " QGANGSDSNMSSSPVSGNSNTPFNKFPNSVSRNTHFESKSPHPNYSRTPHETYSKASSK\n", + " NVPLSPPKQRVVNEHALNIMSEKLQRTNLKEQTPEMDIENSSQNLPFSPMKISPIRASP\n", + " VKTIPSSPSPTTNIFSAPLNNITNCTPMEDEWGEEGF\"\n", + " primer_bind 3918..3937\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:ACCATGTCGACATGCAAACAGTAATGATGGA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(6142..6160)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer\n", + " sequence:GGCGCGCCATTAAAAGCCTTCTTCTCCC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 cgcgccagat ctgtttagct tgcctcgtcc ccgccgggtc acccggccag cgacatggag\n", + " 61 gcccagaata ccctccttga cagtcttgac gtgcgcagct caggggcatg atgtgactgt\n", + " 121 cgcccgtaca tttagcccat acatccccat gtataatcat ttgcatccat acattttgat\n", + " 181 ggccgcacgg cgcgaagcaa aaattacggc tcctcgctgc agacctgcga gcagggaaac\n", + " 241 gctcccctca cagacgcgtt gaattgtccc cacgccgcgc ccctgtagag aaatataaaa\n", + " 301 ggttaggatt tgccactgag gttcttcttt catatacttc cttttaaaat cttgctagga\n", + " 361 tacagttctc acatcacatc cgaacataaa caaccatggg taaggaaaag actcacgttt\n", + " 421 cgaggccgcg attaaattcc aacatggatg ctgatttata tgggtataaa tgggctcgcg\n", + " 481 ataatgtcgg gcaatcaggt gcgacaatct atcgattgta tgggaagccc gatgcgccag\n", + " 541 agttgtttct gaaacatggc aaaggtagcg ttgccaatga tgttacagat gagatggtca\n", + " 601 gactaaactg gctgacggaa tttatgcctc ttccgaccat caagcatttt atccgtactc\n", + " 661 ctgatgatgc atggttactc accactgcga tccccggcaa aacagcattc caggtattag\n", + " 721 aagaatatcc tgattcaggt gaaaatattg ttgatgcgct ggcagtgttc ctgcgccggt\n", + " 781 tgcattcgat tcctgtttgt aattgtcctt ttaacagcga tcgcgtattt cgtctcgctc\n", + " 841 aggcgcaatc acgaatgaat aacggtttgg ttgatgcgag tgattttgat gacgagcgta\n", + " 901 atggctggcc tgttgaacaa gtctggaaag aaatgcataa gcttttgcca ttctcaccgg\n", + " 961 attcagtcgt cactcatggt gatttctcac ttgataacct tatttttgac gaggggaaat\n", + " 1021 taataggttg tattgatgtt ggacgagtcg gaatcgcaga ccgataccag gatcttgcca\n", + " 1081 tcctatggaa ctgcctcggt gagttttctc cttcattaca gaaacggctt tttcaaaaat\n", + " 1141 atggtattga taatcctgat atgaataaat tgcagtttca tttgatgctc gatgagtttt\n", + " 1201 tctaatcagt actgacaata aaaagattct tgttttcaag aacttgtcat ttgtatagtt\n", + " 1261 tttttatatt gtagttgttc tattttaatc aaatgttagc gtgatttata ttttttttcg\n", + " 1321 cctcgacatc atctgcccag atgcgaagtt aagtgcgcag aaagtaatat catgcgtcaa\n", + " 1381 tcgtatgtga atgctggtcg ctatactgct gtcgattcga tactaacgcc gccatccagt\n", + " 1441 ttaaacgagc tcgaattcat cgatgatatc agatccacta gtggcctatg cggccgcgga\n", + " 1501 tctgccggtc tccctatagt gagtcgtatt aatttcgata agccaggtta acctgcatta\n", + " 1561 atgaatcggc caacgcgcgg ggagaggcgg tttgcgtatt gggcgctctt ccgcttcctc\n", + " 1621 gctcactgac tcgctgcgct cggtcgttcg gctgcggcga gcggtatcag ctcactcaaa\n", + " 1681 ggcggtaata cggttatcca cagaatcagg ggataacgca ggaaagaaca tgtgagcaaa\n", + " 1741 aggccagcaa aaggccagga accgtaaaaa ggccgcgttg ctggcgtttt tccataggct\n", + " 1801 ccgcccccct gacgagcatc acaaaaatcg acgctcaagt cagaggtggc gaaacccgac\n", + " 1861 aggactataa agataccagg cgtttccccc tggaagctcc ctcgtgcgct ctcctgttcc\n", + " 1921 gaccctgccg cttaccggat acctgtccgc ctttctccct tcgggaagcg tggcgctttc\n", + " 1981 tcatagctca cgctgtaggt atctcagttc ggtgtaggtc gttcgctcca agctgggctg\n", + " 2041 tgtgcacgaa ccccccgttc agcccgaccg ctgcgcctta tccggtaact atcgtcttga\n", + " 2101 gtccaacccg gtaagacacg acttatcgcc actggcagca gccactggta acaggattag\n", + " 2161 cagagcgagg tatgtaggcg gtgctacaga gttcttgaag tggtggccta actacggcta\n", + " 2221 cactagaaga acagtatttg gtatctgcgc tctgctgaag ccagttacct tcggaaaaag\n", + " 2281 agttggtagc tcttgatccg gcaaacaaac caccgctggt agcggtggtt tttttgtttg\n", + " 2341 caagcagcag attacgcgca gaaaaaaagg atctcaagaa gatcctttga tcttttctac\n", + " 2401 ggggtctgac gctcagtgga acgaaaactc acgttaaggg attttggtca tgagattatc\n", + " 2461 aaaaaggatc ttcacctaga tccttttaaa ttaaaaatga agttttaaat caatctaaag\n", + " 2521 tatatatgag taaacttggt ctgacagtta ccaatgctta atcagtgagg cacctatctc\n", + " 2581 agcgatctgt ctatttcgtt catccatagt tgcctgactc cccgtcgtgt agataactac\n", + " 2641 gatacgggag ggcttaccat ctggccccag tgctgcaatg ataccgcgag acccacgctc\n", + " 2701 accggctcca gatttatcag caataaacca gccagccgga agggccgagc gcagaagtgg\n", + " 2761 tcctgcaact ttatccgcct ccatccagtc tattaattgt tgccgggaag ctagagtaag\n", + " 2821 tagttcgcca gttaatagtt tgcgcaacgt tgttgccatt gctacaggca tcgtggtgtc\n", + " 2881 acgctcgtcg tttggtatgg cttcattcag ctccggttcc caacgatcaa ggcgagttac\n", + " 2941 atgatccccc atgttgtgca aaaaagcggt tagctccttc ggtcctccga tcgttgtcag\n", + " 3001 aagtaagttg gccgcagtgt tatcactcat ggttatggca gcactgcata attctcttac\n", + " 3061 tgtcatgcca tccgtaagat gcttttctgt gactggtgag tactcaacca agtcattctg\n", + " 3121 agaatagtgt atgcggcgac cgagttgctc ttgcccggcg tcaatacggg ataataccgc\n", + " 3181 gccacatagc agaactttaa aagtgctcat cattggaaaa cgttcttcgg ggcgaaaact\n", + " 3241 ctcaaggatc ttaccgctgt tgagatccag ttcgatgtaa cccactcgtg cacccaactg\n", + " 3301 atcttcagca tcttttactt tcaccagcgt ttctgggtga gcaaaaacag gaaggcaaaa\n", + " 3361 tgccgcaaaa aagggaataa gggcgacacg gaaatgttga atactcatac tcttcctttt\n", + " 3421 tcaatattat tgaagcattt atcagggtta ttgtctcatg agcggataca tatttgaatg\n", + " 3481 tatttagaaa aataaacaaa taggggttcc gcgcacattt ccccgaaaag tgccacctga\n", + " 3541 cgtctaagaa accattatta tcatgacatt aacctataaa aataggcgta tcacgaggcc\n", + " 3601 ctttcgtctc gcgcgtttcg gtgatgacgg tgaaaacctc tgacacatgc agctcccgga\n", + " 3661 gacggtcaca gcttgtctgt aagcggatgc cgggagcaga caagcccgtc agggcgcgtc\n", + " 3721 agcgggtgtt ggcgggtgtc ggggctggct taactatgcg gcatcagagc agattgtact\n", + " 3781 gagagtgcac catatggaca tattgtcgtt agaacgcggc tacaattaat acataacctt\n", + " 3841 atgtatcata cacatacgat ttaggtgaca ctatagaacg cggccgccag ctgaagcttc\n", + " 3901 gtacgctgca ggtcgacatg caaacagtaa tgatggatga cattcaaagc actgattcta\n", + " 3961 ttgctgaaaa agataatcac tctaataatg aatctaactt tacttggaaa gcgtttcgtg\n", + " 4021 aacaagtgga aaagcatttt tctaaaattg aaaggcttca ccaagtcctt ggaacagatg\n", + " 4081 gagacaattc atcattattt gagttgttta caacggcaat gaatgcccag cttcatgaaa\n", + " 4141 tggaacagtg ccagaaaaaa cttgaagatg actgtcagca aagaattgat tcaatcagat\n", + " 4201 ttttggtttc ctcattaaag ttaacggatg atacttctag tctcaaaatt gagtctcctt\n", + " 4261 taattcagtg tttgaatcgt ttgtcaatgg tagaaggaca atatatggca cagtatgatc\n", + " 4321 aaaagttaag tacgattaaa ggtatgtaat cgtctttaat ttagacttgt gttttaactg\n", + " 4381 atgtatagaa atgtatcaca aattggagtc atattgtaac cgcttaggaa gtccgttcgt\n", + " 4441 tttacctgat tttgagaatt catttttatc tgatgtatcc gatgctttta ctgaatcttt\n", + " 4501 gagaggacgc atcaacgaag ccgaaaagga gattgatgcg agattagagg ttattaattc\n", + " 4561 ctttgaagaa gaaattttgg gtttgtggtc tgaactcggt gttgagcccg ctgatgttcc\n", + " 4621 acaatacgaa caattgcttg aatcccatac taatcgacca aatgatgttt atgttactca\n", + " 4681 agaacttatc gaccaacttt gcaagcaaaa agaagttttt tccgctgaaa aagaaaagag\n", + " 4741 aagtgatcat ttaaaaagta tacaatcaga agttagcaac ttgtggaata agcttcaagt\n", + " 4801 ttctcccaat gaacaaagtc aatttggcga ttcatcaaac attaatcaag aaaatatttc\n", + " 4861 attatgggaa actgaacttg aaaaacttca tcagttaaaa aaggagcatt tacccatttt\n", + " 4921 tttagaagac tgtcgtcaac aaattcttca gctttgggat tctctgtttt attcagaaga\n", + " 4981 acaaagaaag tcctttacac ctatgtatga agacattatt acagagcagg ttcttacggc\n", + " 5041 ccatgaaaac tatataaagc aactagaggc cgaagtttct gctaataagt cctttttaag\n", + " 5101 cttaattaat cgctatgcct ctttaataga aggaaagaaa gagcttgaag ctagttctaa\n", + " 5161 tgatgcctct cgtctaacac aacggggacg ccgggaccca ggtttacttc tacgtgaaga\n", + " 5221 gaaaatccgt aagcgacttt ctagagaact tcctaaggtt cagtcgctgc ttataccaga\n", + " 5281 gattacagca tgggaagaaa gaaatggaag gacgttcctt ttttatgatg aaccacttct\n", + " 5341 caagatttgc caagaggcca ctcaaccaaa atcattatat agaagtgcaa gtgctgccgc\n", + " 5401 aaaccgcccg aaaacagcaa ctacaacgga ctctgttaat agaacacctt ctcaacgagg\n", + " 5461 gcgtgtagct gtaccttcaa caccaagtgt taggtccgct tctcgagcta tgacgagtcc\n", + " 5521 aaggacaccg cttcctagag taaaaaacac tcaaaatcca agtcgttcca ttagtgcaga\n", + " 5581 accgccatca gcaaccagta ccgccaatag aagacacccc actgctaatc gaattgatat\n", + " 5641 aaacgctaga ttaaacagtg ctagtcggtc tcgaagcgcg aacatgataa gacaaggggc\n", + " 5701 aaatggtagt gacagcaata tgtcttcttc acccgtttct ggaaattcca ataccccttt\n", + " 5761 taacaagttt ccaaattctg tatctcgcaa tacacatttt gaatccaagt caccgcaccc\n", + " 5821 aaattactct cgaactcctc atgaaacgta ttcaaaggct tcatctaaga acgtcccatt\n", + " 5881 aagtcctcca aagcagcgtg tagttaatga acacgcttta aatattatgt cggaaaaatt\n", + " 5941 gcaaagaact aatctgaaag aacaaacacc cgagatggac attgaaaaca gctcgcagaa\n", + " 6001 ccttcctttt tctcctatga agatatcccc cataagagca tcacccgtaa agacaattcc\n", + " 6061 atcatcaccg tcccccacta ccaacatttt ttctgctcca ctcaacaata ttacaaattg\n", + " 6121 tacaccgatg gaggatgaat ggggagaaga aggcttttaa tgg\n", + "//\n" + ] + } + ], + "source": [ + "# Ligating, then circularising the synthetic plasmid\n", + "\n", + "synthetic_vector = plasmid_digests[1] + gene_digests [1]\n", + "synthetic_vector = synthetic_vector.looped()\n", + "\n", + "# Printing out the completed cloning vector\n", + "\n", + "print(synthetic_vector.format(\"gb\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Gibson.ipynb b/docs/notebooks/Gibson.ipynb index 8b9cdf2b..bdf9ec67 100644 --- a/docs/notebooks/Gibson.ipynb +++ b/docs/notebooks/Gibson.ipynb @@ -1,164 +1,164 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Gibson Assembly in pydna\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "Gibson Assembly is a powerful method to assemble multiple DNA fragments into a single, continuous sequence in a seamless, one-step reaction. Developed by Daniel Gibson and colleagues in 2009, this method has been widely applied to work in molecular cloning, biotechnology, and synthetic biology. \n", - "\n", - "`pydna` provides the `Assembly` class to simulate the assembly of DNA sequences. Below is an example fpr performing Gibson Assembly with pre-existing DNA fragments, followed by primer design for generating these fragments via the `pcr` method, if needed.\n", - "\n", - "The `Assembly` takes the following arguments:\n", - " * `frags`: list of DNA fragments as `Dseqrecord` objects\n", - " * `limit`: the minimum sequence homology required.\n", - " * `algorithm`: the function used to find homology regions between DNA fragments. For Gibson Assembly, we use the `terminal_overlap` function, which finds homology regions only at the terminal regions. By default, the `Assembly` class uses the `common_sub_strings` function to find homology regions, which finds homology anywhere, as it could happen in a homologous recombination event.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Assembly\n", - "fragments..: 33bp 34bp 35bp\n", - "limit(bp)..: 14\n", - "G.nodes....: 6\n", - "algorithm..: terminal_overlap\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.assembly import Assembly\n", - "from pydna.common_sub_strings import terminal_overlap\n", - "\n", - "#Creating example Dseqrecord sequences\n", - "fragment1 = Dseqrecord(\"acgatgctatactgCCCCCtgtgctgtgctcta\")\n", - "fragment2 = Dseqrecord(\"tgtgctgtgctctaTTTTTtattctggctgtatc\")\n", - "fragment3 = Dseqrecord(\"tattctggctgtatcGGGGGtacgatgctatactg\")\n", - "\n", - "#Creating a list of sequences to assemble\n", - "fragments = [fragment1, fragment2, fragment3]\n", - "\n", - "#Performing Gibson assembly, with a minimum shared homology of 14bp\n", - "assembly = Assembly(fragments, limit=14, algorithm=terminal_overlap)\n", - "\n", - "#Displaying the assembled product\n", - "print(assembly)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The printed output shows the length of each fragment provided to the assembly, the minimum length required for sequence homology search, the number of nodes (number of overlapping regions), and the algorithm used for sequence homology search. Please refer to the full `Assembly` module documentation for more information on the algorithm applied.\n", - "\n", - "To make a circular sequence from an `Assembly`, pydna provides the `assemble_circular` method. The assembled sequence can be printed as normal, as `Dseqrecord` objects. Note that the `assemble_circular` method returns a list, where the two elements are reverse complement of each other." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: True\n", - "size: 59\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(o59)\n", - "acga..GGGt\n", - "tgct..CCCa\n", - "\n", - "Dseqrecord\n", - "circular: True\n", - "size: 59\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(o59)\n", - "taga..AAAA\n", - "atct..TTTT\n" - ] - } - ], - "source": [ - "from pydna.contig import Contig\n", - "\n", - "#Circularizing the assembled sequence\n", - "assembly_circ = assembly.assemble_circular()\n", - "\n", - "#Printing the sequence records\n", - "print(assembly_circ[0])\n", - "print()\n", - "print(assembly_circ[1])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Please refer to the Example_Gibson page for an example of a completed workflow for modelling Gibson Assembly using pydna. " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gibson Assembly in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "Gibson Assembly is a powerful method to assemble multiple DNA fragments into a single, continuous sequence in a seamless, one-step reaction. Developed by Daniel Gibson and colleagues in 2009, this method has been widely applied to work in molecular cloning, biotechnology, and synthetic biology. \n", + "\n", + "`pydna` provides the `Assembly` class to simulate the assembly of DNA sequences. Below is an example fpr performing Gibson Assembly with pre-existing DNA fragments, followed by primer design for generating these fragments via the `pcr` method, if needed.\n", + "\n", + "The `Assembly` takes the following arguments:\n", + " * `frags`: list of DNA fragments as `Dseqrecord` objects\n", + " * `limit`: the minimum sequence homology required.\n", + " * `algorithm`: the function used to find homology regions between DNA fragments. For Gibson Assembly, we use the `terminal_overlap` function, which finds homology regions only at the terminal regions. By default, the `Assembly` class uses the `common_sub_strings` function to find homology regions, which finds homology anywhere, as it could happen in a homologous recombination event.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assembly\n", + "fragments..: 33bp 34bp 35bp\n", + "limit(bp)..: 14\n", + "G.nodes....: 6\n", + "algorithm..: terminal_overlap\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.assembly import Assembly\n", + "from pydna.common_sub_strings import terminal_overlap\n", + "\n", + "#Creating example Dseqrecord sequences\n", + "fragment1 = Dseqrecord(\"acgatgctatactgCCCCCtgtgctgtgctcta\")\n", + "fragment2 = Dseqrecord(\"tgtgctgtgctctaTTTTTtattctggctgtatc\")\n", + "fragment3 = Dseqrecord(\"tattctggctgtatcGGGGGtacgatgctatactg\")\n", + "\n", + "#Creating a list of sequences to assemble\n", + "fragments = [fragment1, fragment2, fragment3]\n", + "\n", + "#Performing Gibson assembly, with a minimum shared homology of 14bp\n", + "assembly = Assembly(fragments, limit=14, algorithm=terminal_overlap)\n", + "\n", + "#Displaying the assembled product\n", + "print(assembly)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The printed output shows the length of each fragment provided to the assembly, the minimum length required for sequence homology search, the number of nodes (number of overlapping regions), and the algorithm used for sequence homology search. Please refer to the full `Assembly` module documentation for more information on the algorithm applied.\n", + "\n", + "To make a circular sequence from an `Assembly`, pydna provides the `assemble_circular` method. The assembled sequence can be printed as normal, as `Dseqrecord` objects. Note that the `assemble_circular` method returns a list, where the two elements are reverse complement of each other." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: True\n", + "size: 59\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(o59)\n", + "acga..GGGt\n", + "tgct..CCCa\n", + "\n", + "Dseqrecord\n", + "circular: True\n", + "size: 59\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(o59)\n", + "taga..AAAA\n", + "atct..TTTT\n" + ] + } + ], + "source": [ + "from pydna.contig import Contig\n", + "\n", + "#Circularizing the assembled sequence\n", + "assembly_circ = assembly.assemble_circular()\n", + "\n", + "#Printing the sequence records\n", + "print(assembly_circ[0])\n", + "print()\n", + "print(assembly_circ[1])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please refer to the Example_Gibson page for an example of a completed workflow for modelling Gibson Assembly using pydna. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Importing_Seqs.ipynb b/docs/notebooks/Importing_Seqs.ipynb index 15b366e7..8608c806 100755 --- a/docs/notebooks/Importing_Seqs.ipynb +++ b/docs/notebooks/Importing_Seqs.ipynb @@ -1,405 +1,405 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Importing and viewing sequence files in pydna\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "pydna can be used to work with FASTA, Genbank, EMBL, and snapgene files (.fasta, .gb, .embl, .dna). You can read these files into a `Dseqrecord` that one can view and work with. You can also instantiate `Dseqrecord` objects with strings.\n", - "\n", - "## Importing Sequence Files\n", - "\n", - "To import files into pydna is simple. pydna provides the `parse` method to read all DNA sequences in a file into a list. As an input, `parse` can take:\n", - "\n", - "* The path to a file from your computer\n", - "* A python string with the file content.\n", - "\n", - "The following code shows an example of how to use the `parse` function to import a FASTA file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">lcl|U49845.1_cds_AAA98665.1_1 [protein=TCP1-beta] [frame=3] [protein_id=AAA98665.1] [location=<1..206] [gbkey=CDS]\n", - "TCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCC\n", - "GACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCT\n", - "GCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGA\n", - "ACCGCCAATAGACAACATATGTAA\n" - ] - } - ], - "source": [ - "from pydna.parsers import parse\n", - "\n", - "#Import your file into python using its path\n", - "file_path = \"./U49845.fasta\"\n", - "files = parse(file_path)\n", - "\n", - "#Show your FASTA file in python\n", - "print(files[0].format(\"fasta\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that `parse` returns a `list` object, hence requiring `[0]` to take the first element of the list. When you have a FASTA file that contains multiple sequences, you can index the list accordingly (e.g `[0]`, `[1]`, ...)\n", - "\n", - "The last line of code uses the `format` method to generate a string representation of the sequence as a FASTA file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another example, using a GenBank file ([U49845](https://www.ncbi.nlm.nih.gov/nucleotide/U49845)), is shown below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LOCUS SCU49845 5028 bp DNA linear PLN 29-OCT-2018\n", - "DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p\n", - " (AXL2) and Rev7p (REV7) genes, complete cds.\n", - "ACCESSION U49845\n", - "VERSION U49845.1\n", - "KEYWORDS .\n", - "SOURCE Saccharomyces cerevisiae (brewer's yeast)\n", - " ORGANISM Saccharomyces cerevisiae\n", - " Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina;\n", - " Saccharomycetes; Saccharomycetales; Saccharomycetaceae;\n", - " Saccharomyces.\n", - "REFERENCE 1 (bases 1 to 5028)\n", - " AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M.\n", - " TITLE Selection of axial growth sites in yeast requires Axl2p, a novel\n", - " plasma membrane glycoprotein\n", - " JOURNAL Genes Dev. 10 (7), 777-793 (1996)\n", - " PUBMED 8846915\n", - "REFERENCE 2 (bases 1 to 5028)\n", - " AUTHORS Roemer,T.\n", - " TITLE Direct Submission\n", - " JOURNAL Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT\n", - " 06520, USA\n", - "FEATURES Location/Qualifiers\n", - " source 1..5028\n", - " /organism=\"Saccharomyces cerevisiae\"\n", - " /mol_type=\"genomic DNA\"\n", - " /db_xref=\"taxon:4932\"\n", - " /chromosome=\"IX\"\n", - " mRNA <1..>206\n", - " /product=\"TCP1-beta\"\n", - " CDS <1..206\n", - " /codon_start=3\n", - " /product=\"TCP1-beta\"\n", - " /protein_id=\"AAA98665.1\"\n", - " /translation=\"SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAA\n", - " EVLLRVDNIIRARPRTANRQHM\"\n", - " gene <687..>3158\n", - " /gene=\"AXL2\"\n", - " mRNA <687..>3158\n", - " /gene=\"AXL2\"\n", - " /product=\"Axl2p\"\n", - " CDS 687..3158\n", - " /gene=\"AXL2\"\n", - " /note=\"plasma membrane glycoprotein\"\n", - " /codon_start=1\n", - " /product=\"Axl2p\"\n", - " /protein_id=\"AAA98666.1\"\n", - " /translation=\"MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFT\n", - " FQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVI\n", - " LEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFN\n", - " VTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYS\n", - " FVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDD\n", - " PISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFN\n", - " FEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKF\n", - " QSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSS\n", - " HHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGV\n", - " ILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSY\n", - " DDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKP\n", - " PVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKL\n", - " FDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQ\n", - " SGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQV\n", - " KDIHGRIPEML\"\n", - " gene complement(<3300..>4037)\n", - " /gene=\"REV7\"\n", - " mRNA complement(<3300..>4037)\n", - " /gene=\"REV7\"\n", - " /product=\"Rev7p\"\n", - " CDS complement(3300..4037)\n", - " /gene=\"REV7\"\n", - " /codon_start=1\n", - " /product=\"Rev7p\"\n", - " /protein_id=\"AAA98667.1\"\n", - " /translation=\"MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQF\n", - " VPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKD\n", - " DQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVD\n", - " SLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISG\n", - " DDKILNGVYSQYEEGESIFGSLF\"\n", - "ORIGIN\n", - " 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg\n", - " 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct\n", - " 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa\n", - " 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg\n", - " 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa\n", - " 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa\n", - " 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat\n", - " 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga\n", - " 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc\n", - " 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga\n", - " 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta\n", - " 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag\n", - " 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa\n", - " 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata\n", - " 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga\n", - " 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac\n", - " 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg\n", - " 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc\n", - " 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa\n", - " 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca\n", - " 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac\n", - " 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa\n", - " 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag\n", - " 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct\n", - " 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac\n", - " 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa\n", - " 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc\n", - " 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata\n", - " 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca\n", - " 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc\n", - " 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc\n", - " 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca\n", - " 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc\n", - " 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg\n", - " 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt\n", - " 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc\n", - " 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg\n", - " 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca\n", - " 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata\n", - " 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg\n", - " 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga\n", - " 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt\n", - " 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat\n", - " 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt\n", - " 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc\n", - " 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag\n", - " 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta\n", - " 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa\n", - " 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact\n", - " 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt\n", - " 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa\n", - " 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag\n", - " 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct\n", - " 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt\n", - " 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact\n", - " 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa\n", - " 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg\n", - " 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt\n", - " 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc\n", - " 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca\n", - " 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc\n", - " 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc\n", - " 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat\n", - " 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa\n", - " 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga\n", - " 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat\n", - " 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc\n", - " 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc\n", - " 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa\n", - " 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg\n", - " 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc\n", - " 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt\n", - " 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg\n", - " 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg\n", - " 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt\n", - " 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt\n", - " 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat\n", - " 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc\n", - " 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct\n", - " 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta\n", - " 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac\n", - " 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct\n", - " 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct\n", - " 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc\n", - "//" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.parsers import parse\n", - "\n", - "file_path = \"./U49845.gb\"\n", - "files = parse(file_path)\n", - "\n", - "# Convert the Dseqrecord object into a formatted string in GenBank format\n", - "files[0].format(\"gb\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, you can work with the sequence record using pydna, using the `Dseqrecord` class. `Dseqrecord` provides ways to highlight regions of interest on the sequence, adding new features to the record, removing features, and creating new `Dseqrecord` objects to store and export your changes. Please refer to the `Dseq_Features` notebook for more information." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Importing Sequences from Strings\n", - "\n", - "`parse` also allows sequences to be read from a string alone. This could be useful to read FASTA sequences obtained from GenBank APIs. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", - "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATT\n", - "TTATTTTATAGAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTC\n", - "AACTTGCCGCAGTTCGTTCCCATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAA\n", - "CTTATACTGGATGTTCTTTCTAAATTAACGCACGTTTACAGATTTTCCATCTGCATTATT\n", - "AATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAGATTTTAGTGAATTACAACAT\n", - "GTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATTCCGATCTTCC\n", - "TTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", - "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGG\n", - "AGGGTCGATAGTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGT\n", - "CAAGAAGATGAAAATTTACCAGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACT\n", - "TCTTTAGTCGGTTCTGACGTGGGGCCTTTGATTATTCATCAGTTTAGTGAAAAATTAATC\n", - "AGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAATATGAAGAGGGCGAGAGCATT\n", - "TTTGGATCTTTGTTTTAA\n" - ] - } - ], - "source": [ - "from pydna.parsers import parse\n", - "\n", - "my_record = parse(\n", - "'''\n", - ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", - "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATTTTATTTTATA\n", - "GAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTCAACTTGCCGCAGTTCGTTCC\n", - "CATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAACTTATACTGGATGTTCTTTCTAAATTAACG\n", - "CACGTTTACAGATTTTCCATCTGCATTATTAATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAG\n", - "ATTTTAGTGAATTACAACATGTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATT\n", - "CCGATCTTCCTTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", - "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGGAGGGTCGATA\n", - "GTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGTCAAGAAGATGAAAATTTACC\n", - "AGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACTTCTTTAGTCGGTTCTGACGTGGGGCCTTTG\n", - "ATTATTCATCAGTTTAGTGAAAAATTAATCAGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAAT\n", - "ATGAAGAGGGCGAGAGCATTTTTGGATCTTTGTTTTAA\n", - "'''\n", - ")\n", - "print(my_record[0].format(\"fasta\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra info\n", - "\n", - "Note that pydna's `parse` guesses whether the argument passed is a file path or a string, and also guesses the file type based on the content, so it can give unexpected behaviour if your files are not well formatted. To have more control over the parsing of sequences, you can use biopython's `parse` from `Bio.SeqIO`, and then instantiate a `Dseqrecord` from the biopython's `SeqRecord`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(-5028)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from Bio.SeqIO import parse as seqio_parse\n", - "from pydna.dseqrecord import Dseqrecord\n", - "\n", - "file_path = './U49845.gb'\n", - "\n", - "# Extract the first Seqrecord of the SeqIO.parse iterator\n", - "seq_record = next(seqio_parse(file_path, 'genbank'))\n", - "\n", - "# This is how circularity is stored in biopython's seqrecord\n", - "is_circular = 'topology' in seq_record.annotations.keys() and seq_record.annotations['topology'] == 'circular'\n", - "\n", - "# Convert into Dseqrecord\n", - "dseq_record = Dseqrecord(seq_record, circular=is_circular)\n", - "\n", - "dseq_record" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Importing and viewing sequence files in pydna\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "pydna can be used to work with FASTA, Genbank, EMBL, and snapgene files (.fasta, .gb, .embl, .dna). You can read these files into a `Dseqrecord` that one can view and work with. You can also instantiate `Dseqrecord` objects with strings.\n", + "\n", + "## Importing Sequence Files\n", + "\n", + "To import files into pydna is simple. pydna provides the `parse` method to read all DNA sequences in a file into a list. As an input, `parse` can take:\n", + "\n", + "* The path to a file from your computer\n", + "* A python string with the file content.\n", + "\n", + "The following code shows an example of how to use the `parse` function to import a FASTA file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">lcl|U49845.1_cds_AAA98665.1_1 [protein=TCP1-beta] [frame=3] [protein_id=AAA98665.1] [location=<1..206] [gbkey=CDS]\n", + "TCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCC\n", + "GACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCT\n", + "GCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGA\n", + "ACCGCCAATAGACAACATATGTAA\n" + ] + } + ], + "source": [ + "from pydna.parsers import parse\n", + "\n", + "#Import your file into python using its path\n", + "file_path = \"./U49845.fasta\"\n", + "files = parse(file_path)\n", + "\n", + "#Show your FASTA file in python\n", + "print(files[0].format(\"fasta\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `parse` returns a `list` object, hence requiring `[0]` to take the first element of the list. When you have a FASTA file that contains multiple sequences, you can index the list accordingly (e.g `[0]`, `[1]`, ...)\n", + "\n", + "The last line of code uses the `format` method to generate a string representation of the sequence as a FASTA file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another example, using a GenBank file ([U49845](https://www.ncbi.nlm.nih.gov/nucleotide/U49845)), is shown below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOCUS SCU49845 5028 bp DNA linear PLN 29-OCT-2018\n", + "DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p\n", + " (AXL2) and Rev7p (REV7) genes, complete cds.\n", + "ACCESSION U49845\n", + "VERSION U49845.1\n", + "KEYWORDS .\n", + "SOURCE Saccharomyces cerevisiae (brewer's yeast)\n", + " ORGANISM Saccharomyces cerevisiae\n", + " Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina;\n", + " Saccharomycetes; Saccharomycetales; Saccharomycetaceae;\n", + " Saccharomyces.\n", + "REFERENCE 1 (bases 1 to 5028)\n", + " AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M.\n", + " TITLE Selection of axial growth sites in yeast requires Axl2p, a novel\n", + " plasma membrane glycoprotein\n", + " JOURNAL Genes Dev. 10 (7), 777-793 (1996)\n", + " PUBMED 8846915\n", + "REFERENCE 2 (bases 1 to 5028)\n", + " AUTHORS Roemer,T.\n", + " TITLE Direct Submission\n", + " JOURNAL Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT\n", + " 06520, USA\n", + "FEATURES Location/Qualifiers\n", + " source 1..5028\n", + " /organism=\"Saccharomyces cerevisiae\"\n", + " /mol_type=\"genomic DNA\"\n", + " /db_xref=\"taxon:4932\"\n", + " /chromosome=\"IX\"\n", + " mRNA <1..>206\n", + " /product=\"TCP1-beta\"\n", + " CDS <1..206\n", + " /codon_start=3\n", + " /product=\"TCP1-beta\"\n", + " /protein_id=\"AAA98665.1\"\n", + " /translation=\"SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAA\n", + " EVLLRVDNIIRARPRTANRQHM\"\n", + " gene <687..>3158\n", + " /gene=\"AXL2\"\n", + " mRNA <687..>3158\n", + " /gene=\"AXL2\"\n", + " /product=\"Axl2p\"\n", + " CDS 687..3158\n", + " /gene=\"AXL2\"\n", + " /note=\"plasma membrane glycoprotein\"\n", + " /codon_start=1\n", + " /product=\"Axl2p\"\n", + " /protein_id=\"AAA98666.1\"\n", + " /translation=\"MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFT\n", + " FQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVI\n", + " LEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFN\n", + " VTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYS\n", + " FVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDD\n", + " PISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFN\n", + " FEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKF\n", + " QSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSS\n", + " HHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGV\n", + " ILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSY\n", + " DDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKP\n", + " PVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKL\n", + " FDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQ\n", + " SGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQV\n", + " KDIHGRIPEML\"\n", + " gene complement(<3300..>4037)\n", + " /gene=\"REV7\"\n", + " mRNA complement(<3300..>4037)\n", + " /gene=\"REV7\"\n", + " /product=\"Rev7p\"\n", + " CDS complement(3300..4037)\n", + " /gene=\"REV7\"\n", + " /codon_start=1\n", + " /product=\"Rev7p\"\n", + " /protein_id=\"AAA98667.1\"\n", + " /translation=\"MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQF\n", + " VPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKD\n", + " DQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVD\n", + " SLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISG\n", + " DDKILNGVYSQYEEGESIFGSLF\"\n", + "ORIGIN\n", + " 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg\n", + " 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct\n", + " 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa\n", + " 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg\n", + " 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa\n", + " 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa\n", + " 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat\n", + " 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga\n", + " 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc\n", + " 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga\n", + " 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta\n", + " 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag\n", + " 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa\n", + " 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata\n", + " 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga\n", + " 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac\n", + " 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg\n", + " 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc\n", + " 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa\n", + " 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca\n", + " 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac\n", + " 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa\n", + " 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag\n", + " 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct\n", + " 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac\n", + " 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa\n", + " 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc\n", + " 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata\n", + " 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca\n", + " 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc\n", + " 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc\n", + " 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca\n", + " 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc\n", + " 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg\n", + " 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt\n", + " 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc\n", + " 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg\n", + " 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca\n", + " 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata\n", + " 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg\n", + " 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga\n", + " 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt\n", + " 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat\n", + " 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt\n", + " 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc\n", + " 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag\n", + " 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta\n", + " 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa\n", + " 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact\n", + " 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt\n", + " 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa\n", + " 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag\n", + " 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct\n", + " 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt\n", + " 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact\n", + " 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa\n", + " 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg\n", + " 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt\n", + " 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc\n", + " 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca\n", + " 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc\n", + " 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc\n", + " 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat\n", + " 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa\n", + " 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga\n", + " 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat\n", + " 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc\n", + " 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc\n", + " 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa\n", + " 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg\n", + " 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc\n", + " 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt\n", + " 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg\n", + " 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg\n", + " 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt\n", + " 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt\n", + " 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat\n", + " 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc\n", + " 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct\n", + " 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta\n", + " 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac\n", + " 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct\n", + " 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct\n", + " 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc\n", + "//" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.parsers import parse\n", + "\n", + "file_path = \"./U49845.gb\"\n", + "files = parse(file_path)\n", + "\n", + "# Convert the Dseqrecord object into a formatted string in GenBank format\n", + "files[0].format(\"gb\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, you can work with the sequence record using pydna, using the `Dseqrecord` class. `Dseqrecord` provides ways to highlight regions of interest on the sequence, adding new features to the record, removing features, and creating new `Dseqrecord` objects to store and export your changes. Please refer to the `Dseq_Features` notebook for more information." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing Sequences from Strings\n", + "\n", + "`parse` also allows sequences to be read from a string alone. This could be useful to read FASTA sequences obtained from GenBank APIs. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", + "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATT\n", + "TTATTTTATAGAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTC\n", + "AACTTGCCGCAGTTCGTTCCCATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAA\n", + "CTTATACTGGATGTTCTTTCTAAATTAACGCACGTTTACAGATTTTCCATCTGCATTATT\n", + "AATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAGATTTTAGTGAATTACAACAT\n", + "GTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATTCCGATCTTCC\n", + "TTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", + "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGG\n", + "AGGGTCGATAGTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGT\n", + "CAAGAAGATGAAAATTTACCAGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACT\n", + "TCTTTAGTCGGTTCTGACGTGGGGCCTTTGATTATTCATCAGTTTAGTGAAAAATTAATC\n", + "AGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAATATGAAGAGGGCGAGAGCATT\n", + "TTTGGATCTTTGTTTTAA\n" + ] + } + ], + "source": [ + "from pydna.parsers import parse\n", + "\n", + "my_record = parse(\n", + "'''\n", + ">lcl|U49845.1_cds_AAA98667.1_3 [gene=REV7] [protein=Rev7p] [protein_id=AAA98667.1] [location=complement(3300..4037)] [gbkey=CDS]\n", + "ATGAATAGATGGGTAGAGAAGTGGCTGAGGGTATACTTAAAATGCTACATTAATTTGATTTTATTTTATA\n", + "GAAATGTATACCCACCTCAGTCATTCGACTACACTACTTACCAGTCATTCAACTTGCCGCAGTTCGTTCC\n", + "CATTAATAGGCATCCTGCTTTAATTGACTATATAGAAGAACTTATACTGGATGTTCTTTCTAAATTAACG\n", + "CACGTTTACAGATTTTCCATCTGCATTATTAATAAAAAGAACGATTTATGCATTGAAAAATACGTTTTAG\n", + "ATTTTAGTGAATTACAACATGTGGATAAAGACGATCAGATCATTACGGAAACTGAAGTGTTCGACGAATT\n", + "CCGATCTTCCTTAAATAGTTTGATTATGCATTTGGAGAAATTACCTAAAGTCAACGATGACACAATAACA\n", + "TTTGAAGCAGTTATTAATGCGATCGAATTGGAACTAGGACATAAGTTGGACAGAAACAGGAGGGTCGATA\n", + "GTTTGGAGGAAAAAGCAGAAATTGAAAGGGATTCAAACTGGGTTAAATGTCAAGAAGATGAAAATTTACC\n", + "AGACAATAATGGTTTTCAACCTCCTAAAATAAAACTCACTTCTTTAGTCGGTTCTGACGTGGGGCCTTTG\n", + "ATTATTCATCAGTTTAGTGAAAAATTAATCAGCGGTGACGACAAAATTTTGAATGGAGTGTATTCTCAAT\n", + "ATGAAGAGGGCGAGAGCATTTTTGGATCTTTGTTTTAA\n", + "'''\n", + ")\n", + "print(my_record[0].format(\"fasta\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra info\n", + "\n", + "Note that pydna's `parse` guesses whether the argument passed is a file path or a string, and also guesses the file type based on the content, so it can give unexpected behaviour if your files are not well formatted. To have more control over the parsing of sequences, you can use biopython's `parse` from `Bio.SeqIO`, and then instantiate a `Dseqrecord` from the biopython's `SeqRecord`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(-5028)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from Bio.SeqIO import parse as seqio_parse\n", + "from pydna.dseqrecord import Dseqrecord\n", + "\n", + "file_path = './U49845.gb'\n", + "\n", + "# Extract the first Seqrecord of the SeqIO.parse iterator\n", + "seq_record = next(seqio_parse(file_path, 'genbank'))\n", + "\n", + "# This is how circularity is stored in biopython's seqrecord\n", + "is_circular = 'topology' in seq_record.annotations.keys() and seq_record.annotations['topology'] == 'circular'\n", + "\n", + "# Convert into Dseqrecord\n", + "dseq_record = Dseqrecord(seq_record, circular=is_circular)\n", + "\n", + "dseq_record" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/PCR.ipynb b/docs/notebooks/PCR.ipynb index 4cc29260..d6cb534f 100755 --- a/docs/notebooks/PCR.ipynb +++ b/docs/notebooks/PCR.ipynb @@ -1,395 +1,395 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to Perform a Polymerase Chain Reaction (PCR)\n", - "\n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "This page walks you through how to perform a PCR, and design PCR primers, using `pydna`. A PCR amplifies a specific stretch of DNA defined by the primers, and it is critical to ensure primer binding specificity and appropriate primer melting temperature (tm) through careful design. `pydna` provides tools for quick modelling of PCR to check for correct PCR products, and methods for calculating tm and primer design, as performed in other popular web servcies (e.g Primer3Plus). \n", - "\n", - "## Modelling PCR with Known Primers\n", - "\n", - "To perform PCR, `pydna` provides the `anneal` class and the `pcr` method to quickly generate expected primer products, on a `Dseqrecord` object. The `pcr` method needs only the forward and reverse primers, and the sequence. The primers must be passed from the 5' to the 3' end, following biological convention. More information on `Dseqrecord` and importing DNA sequences can be found in the other guide pages. \n", - " \n", - "The following example uses a 300+ bp custom sample circular DNA, containing an example gene that we would like to clone. 18 bp forward and reverse primers have been provided. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LOCUS 45bp_PCR_prod 45 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_description_description.\n", - "ACCESSION 45bp\n", - "VERSION 45bp\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 1..45\n", - " /label=\"example_gene\"\n", - " primer_bind 1..18\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACATC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(28..45)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTT\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", - "//" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.amplify import pcr\n", - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.parsers import parse\n", - "\n", - "#Importing GenBank file containing sample sequence \n", - "path = \"./sample_seq.gb\"\n", - "record = parse(path)[0]\n", - "\n", - "#Defining forward and reverse primers\n", - "fwd_primer = \"ATTCCTGCAGAGTACATC\"\n", - "rvs_primer = \"ACCATCCGAAGATATCTT\"\n", - "\n", - "#Performing PCR\n", - "pcr_product = pcr(fwd_primer, rvs_primer, record)\n", - "\n", - "#Printing results\n", - "pcr_product.format(\"gb\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `pcr` method then returns a `Amplicon` class object (to model a PCR product), a subclass of `Dseqrecord` with some extra methods (e.g `.figure`. See \"Other ways of visualising the PCR products\" section below). All the features inside the PCR product has been retained. Note how the example gene has been retained as a feature in `pcr_product`. In addition, two new features have been added to the record to indicate the forward and reverse primer binding regions.\n", - "\n", - "`pydna` also allows modelling for PCR with extra bases on the 5' end of primers. This functionality is useful for modelling molecular cloning with multiple steps, where you might want to add different restriction sites to PCR products and ensure that the right sequences have been replicated.\n", - "\n", - "For instance, to make sure that I can add a HindIII restriction site (AAGCTT) at the end of my `example_gene` without accidental hybridisation with other parts of the circular sequence, I can perform PCR in the `pydna` package like so." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LOCUS 57bp_PCR_prod 57 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_description_description.\n", - "ACCESSION 57bp\n", - "VERSION 57bp\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " primer_bind 1..21\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:AAGCTTATTCCTGCAGAGTACATC\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " gene 4..48\n", - " /label=\"example_gene\"\n", - " primer_bind complement(31..48)\n", - " /label=\"name\"\n", - " /PCR_conditions=\"primer sequence:AAGCTTACCATCCGAAGATATCTT\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 aagcttattc ctgcagagta catcaattct atgaagatat cttcggatgg taagctt\n", - "//" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fwd_primer = \"AAGCTTATTCCTGCAGAGTACATC\"\n", - "rvs_primer = \"AAGCTTACCATCCGAAGATATCTT\"\n", - "\n", - "#Performing PCR\n", - "pcr_product_HindIII = pcr(fwd_primer, rvs_primer, record)\n", - "\n", - "#Printing results\n", - "pcr_product_HindIII.format(\"gb\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more information on restriction digests and ligation, please refer to the Restriction and Ligation page. \n", - "\n", - "## Other ways of visualising the PCR products \n", - "\n", - "In addition to the normal `print` function and the `.format()` method (More information can be found in Dseq and Importing_Seqs pages, respectively), pcr products can also be visualized in other ways.\n", - "\n", - "We can check the sequence of the pcr products alone using the `.seq` attribute on a PCR product:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ATTCCTGCAGAGTACATCAATTCTATGAAGATATCTTCGGATGGT\n" - ] - } - ], - "source": [ - "print(pcr_product.seq)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also visualize the pcr products as a figure, using the `.figure` method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5ATTCCTGCAGAGTACATC...AAGATATCTTCGGATGGT3\n", - " ||||||||||||||||||\n", - " 3TTCTATAGAAGCCTACCA5\n", - "5ATTCCTGCAGAGTACATC3\n", - " ||||||||||||||||||\n", - "3TAAGGACGTCTCATGTAG...TTCTATAGAAGCCTACCA5\n" - ] - } - ], - "source": [ - "print(pcr_product.figure())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Designing Primers and Calculating Tm in pydna\n", - "\n", - "`pydna` also provides the `primer_design` method to design primer sequences based on the desired pcr product and the template sequence's melting temperature (Tm). The `primer_design` method can be imported from the `pydna.design` module, and needs the user to supply the PCR template sequence (as a `Dseqrecord` object) and the Tm. The template sequence should be given as the first parameter, and the Tm give through the `target_tm=` argument, as demonstrated below. If you have no specific Tm in mind, the method uses the default Tm of 55 degrees celcius.\n", - "\n", - "Note that in the following example below, I used zero-based indexing on the `Dseqrecord` to find the sequence of my example gene, of which I would like to clone via PCR. Please refer to the `Dseq` page for more information on how to index a sequence. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS example_gene 45 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_f45 example_gene_r45 example_gene.\n", - "ACCESSION example_gene\n", - "VERSION example_gene\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 1..45\n", - " /label=\"example_gene\"\n", - " primer_bind 1..16\n", - " /label=\"f45\"\n", - " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(29..45)\n", - " /label=\"r45\"\n", - " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCT\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", - "//\n" - ] - } - ], - "source": [ - "from pydna.design import primer_design\n", - "\n", - "#Designing the primers\n", - "primers = primer_design(record[6:51], target_tm=50.0)\n", - "\n", - "#Printing the output\n", - "print(primers.format(\"gb\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The formula for primer design in `pydna` is based on the Tm formula from Rychlik et al (1990), found [here](http://www.ncbi.nlm.nih.gov/pubmed/2243783). Additional information on calculating Tm can be found in the \"Calculating Tm\" section below.\n", - "\n", - "The forward and reverse primer sequences are printed in the features list of the `Amplicon` object. Note how the feature representing the example gene is retained, as appropriate. \n", - "\n", - "If you already have a forward / reverse primer, `primer_design` also allows this information to be taken as arguments. `fp` specifies the forward primer, `rp` specifies the reverse primers. `fp` and `rp` can be should be given as `Primer` class objects, which should be imported from `pydna` too. \n", - "\n", - "For instance, if I already have a forward primer containing an EcoRI restriction site, and I aim to to generate a reverse primer of a similar Tm, I can apply the following code: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS example_gene 51 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION pcr_product_forward_primer example_gene_r45 example_gene.\n", - "ACCESSION example_gene\n", - "VERSION example_gene\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " gene 1..45\n", - " /label=\"example_gene\"\n", - " primer_bind 1..19\n", - " /label=\"f45\"\n", - " /PCR_conditions=\"primer sequence:GAATTCATTCCTGCAGAGTACATCA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - " primer_bind complement(26..45)\n", - " /label=\"r45\"\n", - " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTTCA\"\n", - " /ApEinfo_fwdcolor=\"#baffa3\"\n", - " /ApEinfo_revcolor=\"#ffbaba\"\n", - "ORIGIN\n", - " 1 gaattcattc ctgcagagta catcaattct atgaagatat cttcggatgg t\n", - "//\n" - ] - } - ], - "source": [ - "from pydna.primer import Primer\n", - "\n", - "forward_primer = Primer(\"GAATTCATTCCTGCAGAGTACATCA\", id=\"forward_primer\")\n", - "\n", - "primers_sixfive = primer_design(record[6:51], fp = forward_primer)\n", - "\n", - "print(primers_sixfive.format(\"gb\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculating Tm\n", - "\n", - "`pydna` comes with some functions to calculate Tms. The default function `tm_default` used is the previously mentioned one by Rychlik et al (1990), which takes a string as input. Another function derive from the Tm calculation adapted for primers using polymerases with a DNA binding domain (e.g Phusion polymerase). The default values for Tm calculation, including primer concentration, buffer strengths, and more, can also be modified through arguments in the `tm_default` method. Please refer to the `pydna.tm` module docstring for more information. An example is provided with a pair of primers; the temperature is given in degrees celcius." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "55.901005046706075\n", - "55.841913263215304\n" - ] - } - ], - "source": [ - "from pydna.tm import tm_default\n", - "\n", - "# Example Tm calculation for a pair of primers\n", - "primer_f = \"ATTCCTGCAGAGTACATCA\"\n", - "primer_r = \"ACCATCCGAAGATATCTTCA\"\n", - "tm_f = tm_default(primer_f)\n", - "tm_r = tm_default(primer_r)\n", - "\n", - "print(tm_f)\n", - "print(tm_r)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to Perform a Polymerase Chain Reaction (PCR)\n", + "\n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "This page walks you through how to perform a PCR, and design PCR primers, using `pydna`. A PCR amplifies a specific stretch of DNA defined by the primers, and it is critical to ensure primer binding specificity and appropriate primer melting temperature (tm) through careful design. `pydna` provides tools for quick modelling of PCR to check for correct PCR products, and methods for calculating tm and primer design, as performed in other popular web servcies (e.g Primer3Plus). \n", + "\n", + "## Modelling PCR with Known Primers\n", + "\n", + "To perform PCR, `pydna` provides the `anneal` class and the `pcr` method to quickly generate expected primer products, on a `Dseqrecord` object. The `pcr` method needs only the forward and reverse primers, and the sequence. The primers must be passed from the 5' to the 3' end, following biological convention. More information on `Dseqrecord` and importing DNA sequences can be found in the other guide pages. \n", + " \n", + "The following example uses a 300+ bp custom sample circular DNA, containing an example gene that we would like to clone. 18 bp forward and reverse primers have been provided. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOCUS 45bp_PCR_prod 45 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_description_description.\n", + "ACCESSION 45bp\n", + "VERSION 45bp\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 1..45\n", + " /label=\"example_gene\"\n", + " primer_bind 1..18\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACATC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(28..45)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTT\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", + "//" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.amplify import pcr\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.parsers import parse\n", + "\n", + "#Importing GenBank file containing sample sequence \n", + "path = \"./sample_seq.gb\"\n", + "record = parse(path)[0]\n", + "\n", + "#Defining forward and reverse primers\n", + "fwd_primer = \"ATTCCTGCAGAGTACATC\"\n", + "rvs_primer = \"ACCATCCGAAGATATCTT\"\n", + "\n", + "#Performing PCR\n", + "pcr_product = pcr(fwd_primer, rvs_primer, record)\n", + "\n", + "#Printing results\n", + "pcr_product.format(\"gb\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `pcr` method then returns a `Amplicon` class object (to model a PCR product), a subclass of `Dseqrecord` with some extra methods (e.g `.figure`. See \"Other ways of visualising the PCR products\" section below). All the features inside the PCR product has been retained. Note how the example gene has been retained as a feature in `pcr_product`. In addition, two new features have been added to the record to indicate the forward and reverse primer binding regions.\n", + "\n", + "`pydna` also allows modelling for PCR with extra bases on the 5' end of primers. This functionality is useful for modelling molecular cloning with multiple steps, where you might want to add different restriction sites to PCR products and ensure that the right sequences have been replicated.\n", + "\n", + "For instance, to make sure that I can add a HindIII restriction site (AAGCTT) at the end of my `example_gene` without accidental hybridisation with other parts of the circular sequence, I can perform PCR in the `pydna` package like so." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOCUS 57bp_PCR_prod 57 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_description_description.\n", + "ACCESSION 57bp\n", + "VERSION 57bp\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " primer_bind 1..21\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:AAGCTTATTCCTGCAGAGTACATC\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " gene 4..48\n", + " /label=\"example_gene\"\n", + " primer_bind complement(31..48)\n", + " /label=\"name\"\n", + " /PCR_conditions=\"primer sequence:AAGCTTACCATCCGAAGATATCTT\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 aagcttattc ctgcagagta catcaattct atgaagatat cttcggatgg taagctt\n", + "//" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fwd_primer = \"AAGCTTATTCCTGCAGAGTACATC\"\n", + "rvs_primer = \"AAGCTTACCATCCGAAGATATCTT\"\n", + "\n", + "#Performing PCR\n", + "pcr_product_HindIII = pcr(fwd_primer, rvs_primer, record)\n", + "\n", + "#Printing results\n", + "pcr_product_HindIII.format(\"gb\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more information on restriction digests and ligation, please refer to the Restriction and Ligation page. \n", + "\n", + "## Other ways of visualising the PCR products \n", + "\n", + "In addition to the normal `print` function and the `.format()` method (More information can be found in Dseq and Importing_Seqs pages, respectively), pcr products can also be visualized in other ways.\n", + "\n", + "We can check the sequence of the pcr products alone using the `.seq` attribute on a PCR product:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ATTCCTGCAGAGTACATCAATTCTATGAAGATATCTTCGGATGGT\n" + ] + } + ], + "source": [ + "print(pcr_product.seq)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also visualize the pcr products as a figure, using the `.figure` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5ATTCCTGCAGAGTACATC...AAGATATCTTCGGATGGT3\n", + " ||||||||||||||||||\n", + " 3TTCTATAGAAGCCTACCA5\n", + "5ATTCCTGCAGAGTACATC3\n", + " ||||||||||||||||||\n", + "3TAAGGACGTCTCATGTAG...TTCTATAGAAGCCTACCA5\n" + ] + } + ], + "source": [ + "print(pcr_product.figure())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Designing Primers and Calculating Tm in pydna\n", + "\n", + "`pydna` also provides the `primer_design` method to design primer sequences based on the desired pcr product and the template sequence's melting temperature (Tm). The `primer_design` method can be imported from the `pydna.design` module, and needs the user to supply the PCR template sequence (as a `Dseqrecord` object) and the Tm. The template sequence should be given as the first parameter, and the Tm give through the `target_tm=` argument, as demonstrated below. If you have no specific Tm in mind, the method uses the default Tm of 55 degrees celcius.\n", + "\n", + "Note that in the following example below, I used zero-based indexing on the `Dseqrecord` to find the sequence of my example gene, of which I would like to clone via PCR. Please refer to the `Dseq` page for more information on how to index a sequence. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS example_gene 45 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_f45 example_gene_r45 example_gene.\n", + "ACCESSION example_gene\n", + "VERSION example_gene\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 1..45\n", + " /label=\"example_gene\"\n", + " primer_bind 1..16\n", + " /label=\"f45\"\n", + " /PCR_conditions=\"primer sequence:ATTCCTGCAGAGTACA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(29..45)\n", + " /label=\"r45\"\n", + " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCT\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 attcctgcag agtacatcaa ttctatgaag atatcttcgg atggt\n", + "//\n" + ] + } + ], + "source": [ + "from pydna.design import primer_design\n", + "\n", + "#Designing the primers\n", + "primers = primer_design(record[6:51], target_tm=50.0)\n", + "\n", + "#Printing the output\n", + "print(primers.format(\"gb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The formula for primer design in `pydna` is based on the Tm formula from Rychlik et al (1990), found [here](http://www.ncbi.nlm.nih.gov/pubmed/2243783). Additional information on calculating Tm can be found in the \"Calculating Tm\" section below.\n", + "\n", + "The forward and reverse primer sequences are printed in the features list of the `Amplicon` object. Note how the feature representing the example gene is retained, as appropriate. \n", + "\n", + "If you already have a forward / reverse primer, `primer_design` also allows this information to be taken as arguments. `fp` specifies the forward primer, `rp` specifies the reverse primers. `fp` and `rp` can be should be given as `Primer` class objects, which should be imported from `pydna` too. \n", + "\n", + "For instance, if I already have a forward primer containing an EcoRI restriction site, and I aim to to generate a reverse primer of a similar Tm, I can apply the following code: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS example_gene 51 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION pcr_product_forward_primer example_gene_r45 example_gene.\n", + "ACCESSION example_gene\n", + "VERSION example_gene\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " gene 1..45\n", + " /label=\"example_gene\"\n", + " primer_bind 1..19\n", + " /label=\"f45\"\n", + " /PCR_conditions=\"primer sequence:GAATTCATTCCTGCAGAGTACATCA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + " primer_bind complement(26..45)\n", + " /label=\"r45\"\n", + " /PCR_conditions=\"primer sequence:ACCATCCGAAGATATCTTCA\"\n", + " /ApEinfo_fwdcolor=\"#baffa3\"\n", + " /ApEinfo_revcolor=\"#ffbaba\"\n", + "ORIGIN\n", + " 1 gaattcattc ctgcagagta catcaattct atgaagatat cttcggatgg t\n", + "//\n" + ] + } + ], + "source": [ + "from pydna.primer import Primer\n", + "\n", + "forward_primer = Primer(\"GAATTCATTCCTGCAGAGTACATCA\", id=\"forward_primer\")\n", + "\n", + "primers_sixfive = primer_design(record[6:51], fp = forward_primer)\n", + "\n", + "print(primers_sixfive.format(\"gb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculating Tm\n", + "\n", + "`pydna` comes with some functions to calculate Tms. The default function `tm_default` used is the previously mentioned one by Rychlik et al (1990), which takes a string as input. Another function derive from the Tm calculation adapted for primers using polymerases with a DNA binding domain (e.g Phusion polymerase). The default values for Tm calculation, including primer concentration, buffer strengths, and more, can also be modified through arguments in the `tm_default` method. Please refer to the `pydna.tm` module docstring for more information. An example is provided with a pair of primers; the temperature is given in degrees celcius." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "55.901005046706075\n", + "55.841913263215304\n" + ] + } + ], + "source": [ + "from pydna.tm import tm_default\n", + "\n", + "# Example Tm calculation for a pair of primers\n", + "primer_f = \"ATTCCTGCAGAGTACATCA\"\n", + "primer_r = \"ACCATCCGAAGATATCTTCA\"\n", + "tm_f = tm_default(primer_f)\n", + "tm_r = tm_default(primer_r)\n", + "\n", + "print(tm_f)\n", + "print(tm_r)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/Restrict_Ligate_Cloning.ipynb b/docs/notebooks/Restrict_Ligate_Cloning.ipynb index 3b1ed426..60ba5219 100644 --- a/docs/notebooks/Restrict_Ligate_Cloning.ipynb +++ b/docs/notebooks/Restrict_Ligate_Cloning.ipynb @@ -1,319 +1,319 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Restriction and Ligation \n", - "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", - "\n", - "In this page, we explore how to use pydna to cut, ligate, circularise DNA sequences. pydna works in conjugation with the `Bio.Restriction` module to apply a vast variety of restriction enzymes for cutting, whose module documentations can be found [here](https://biopython.org/DIST/docs/cookbook/Restriction.html).\n", - "\n", - "## Cutting with one or more restriction enzymes\n", - "\n", - "Restriction enzymes recognise specific DNA sequences and cut them, leaving sticky ends or blunt ends. To cut a sequence using `pydna`, we can use the `cut` method on a `Dseqrecord` object. Here is an example showing how to use the `cut` method to genenrate EcoRI restriction digests. The record includes a 338bp circular sequence, with an example gene feature." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 338\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 1\n", - "/molecule_type=DNA\n", - "Dseq(-338)\n", - "AATTCTTC..TGTG \n", - " GAAG..ACACTTAA\n" - ] - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "from pydna.parsers import parse\n", - "from Bio.Restriction import EcoRI\n", - "\n", - "# Create a Dseqrecord with your FASTA/GenBank file\n", - "path = \"./sample_seq.gb\"\n", - "record = parse(path)[0]\n", - "\n", - "# Cut with a single enzyme\n", - "cut_records = record.cut(EcoRI)\n", - "\n", - "# Display the resulting fragments\n", - "for frag in cut_records:\n", - " print(frag)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The circular `Dseqrecord` is cut into a linear `Dseqrecord` object, since there is only one EcoRI recognition site. `Dseqrecord` also shows the 5' sticky end after cutting.\n", - "\n", - "The sequence can also be cut with multiple restriction enzymes, into multiple linear DNA sequences. We can simply import all the restriction enzymes, and use the cut method as normal." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dseqrecord\n", - "circular: False\n", - "size: 51\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-51)\n", - "ATCT..TGTG \n", - "TAGA..ACACTTAA\n", - "\n", - "\n", - "Dseqrecord\n", - "circular: False\n", - "size: 214\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-214)\n", - "AATTCTTC..TGAT\n", - " GAAG..ACTA\n", - "\n", - "\n", - "Dseqrecord\n", - "circular: False\n", - "size: 73\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-73)\n", - "ATCT..AGAT\n", - "TAGA..TCTA\n", - "\n" - ] - } - ], - "source": [ - "from Bio.Restriction import EcoRV\n", - "\n", - "# Cut with a multiple enzymes\n", - "multi_cut_records = record.cut(EcoRI, EcoRV)\n", - "\n", - "# Display the resulting fragments\n", - "for frag in multi_cut_records:\n", - " print()\n", - " print(frag)\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are two EcoRV recognition sites in `sample_seq`, and coupled with the one EcoRI recognition site, three DNA fragments are returned. Note how `Dseqrecord` returns the blunt end after EcoRV cuts. \n", - "\n", - "You can model any, and and number of, enzymes with the `cut` method and `Bio.Restriction` module. This makes `pydna` a quick and powerful method to plan your molecular cloning experiments, for instance to check the restriction digests of a 10kb plasmid with multiple enzymes. `cut` is also a method of the `Dseq` class, so `Dseq`s can be used as well. \n", - "\n", - "## Ligating fragments\n", - "\n", - "After cutting a DNA sequence, you can ligate the fragments back together in `pydna` using the `+` operator on `Dseqrecord` or `Dseq` objects. Ligation can occur via complementary sticky ends or blunt ends. For instance, we can select the first and second fragments from `multi_cut_records` via indexing, and then ligate sticky ends produced by EcoRI to make a single linear sequence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 261\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-261)\n", - "ATCT..TGAT\n", - "TAGA..ACTA\n" - ] - } - ], - "source": [ - "ligated_product = multi_cut_records[0] + multi_cut_records[1]\n", - "print(ligated_product)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also join blunt ends in a similar way. Note that the sticky-ends must be a perfect match to join. If `+` ligation (or any other method, really) doesn't work, make sure that:\n", - "\n", - "1. you are indeed performing the operation on `Dseqrecord` objects, as opposed to other data types (e.g lists, strings, etc)\n", - "2. `Dseqrecord` and the correct enzyme name (with correct roman numeral spelling) has been imported. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Circularizing fragments\n", - "\n", - "To circularize a cut DNA sequence use the `looped` method, which returns a new sequence object.\n", - "\n", - "🚨🚨 **VERY IMPORTANT** 🚨🚨 `.looped()` method does not act in place, so a new variable should be created to store the new circularised sequence, as shown in the following example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "is ligated_product circular? False\n", - "is circular_record circular? True\n", - "\n", - "Dseqrecord\n", - "circular: True\n", - "size: 261\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(o261)\n", - "ATCT..TGAT\n", - "TAGA..ACTA\n" - ] - } - ], - "source": [ - "circular_record = ligated_product.looped()\n", - "\n", - "print('is ligated_product circular?', ligated_product.circular)\n", - "print('is circular_record circular?', circular_record.circular)\n", - "print()\n", - "\n", - "print(circular_record)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra Notes: What happens to features when cutting/ligating?\n", - "\n", - "A feature is removed from a `Dseqrecord` if the features is truncated by the cut. For instance, the example_gene feature is removed from the record after cutting `record` with PstI, which has recognition site within example_gene. within the cutand if the feature is completely within the cut, it is retained. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord\n", - "circular: False\n", - "size: 222\n", - "ID: id\n", - "Name: name\n", - "Description: description\n", - "Number of features: 0\n", - "/molecule_type=DNA\n", - "Dseq(-222)\n", - " GAGT..TAACTGCA\n", - "ACGTCTCA..ATTG \n" - ] - } - ], - "source": [ - "from Bio.Restriction import PstI\n", - "\n", - "cut_record2 = record.cut(PstI)\n", - "\n", - "print(cut_record2[0])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "However, if a cut does not overlap with the feature, the feature is retained on the `Dseqrecord`. For instance, if we go back to the first example given by the EcoRI cut, example_gene has been retained after cutting. For more information on Features, please refer to the `Dseq_Feature` documentations." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Restriction and Ligation \n", + "> Visit the full library documentation [here](https://pydna-group.github.io/pydna/)\n", + "\n", + "In this page, we explore how to use pydna to cut, ligate, circularise DNA sequences. pydna works in conjugation with the `Bio.Restriction` module to apply a vast variety of restriction enzymes for cutting, whose module documentations can be found [here](https://biopython.org/DIST/docs/cookbook/Restriction.html).\n", + "\n", + "## Cutting with one or more restriction enzymes\n", + "\n", + "Restriction enzymes recognise specific DNA sequences and cut them, leaving sticky ends or blunt ends. To cut a sequence using `pydna`, we can use the `cut` method on a `Dseqrecord` object. Here is an example showing how to use the `cut` method to genenrate EcoRI restriction digests. The record includes a 338bp circular sequence, with an example gene feature." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 338\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 1\n", + "/molecule_type=DNA\n", + "Dseq(-338)\n", + "AATTCTTC..TGTG \n", + " GAAG..ACACTTAA\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.parsers import parse\n", + "from Bio.Restriction import EcoRI\n", + "\n", + "# Create a Dseqrecord with your FASTA/GenBank file\n", + "path = \"./sample_seq.gb\"\n", + "record = parse(path)[0]\n", + "\n", + "# Cut with a single enzyme\n", + "cut_records = record.cut(EcoRI)\n", + "\n", + "# Display the resulting fragments\n", + "for frag in cut_records:\n", + " print(frag)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The circular `Dseqrecord` is cut into a linear `Dseqrecord` object, since there is only one EcoRI recognition site. `Dseqrecord` also shows the 5' sticky end after cutting.\n", + "\n", + "The sequence can also be cut with multiple restriction enzymes, into multiple linear DNA sequences. We can simply import all the restriction enzymes, and use the cut method as normal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dseqrecord\n", + "circular: False\n", + "size: 51\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-51)\n", + "ATCT..TGTG \n", + "TAGA..ACACTTAA\n", + "\n", + "\n", + "Dseqrecord\n", + "circular: False\n", + "size: 214\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-214)\n", + "AATTCTTC..TGAT\n", + " GAAG..ACTA\n", + "\n", + "\n", + "Dseqrecord\n", + "circular: False\n", + "size: 73\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-73)\n", + "ATCT..AGAT\n", + "TAGA..TCTA\n", + "\n" + ] + } + ], + "source": [ + "from Bio.Restriction import EcoRV\n", + "\n", + "# Cut with a multiple enzymes\n", + "multi_cut_records = record.cut(EcoRI, EcoRV)\n", + "\n", + "# Display the resulting fragments\n", + "for frag in multi_cut_records:\n", + " print()\n", + " print(frag)\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two EcoRV recognition sites in `sample_seq`, and coupled with the one EcoRI recognition site, three DNA fragments are returned. Note how `Dseqrecord` returns the blunt end after EcoRV cuts. \n", + "\n", + "You can model any, and and number of, enzymes with the `cut` method and `Bio.Restriction` module. This makes `pydna` a quick and powerful method to plan your molecular cloning experiments, for instance to check the restriction digests of a 10kb plasmid with multiple enzymes. `cut` is also a method of the `Dseq` class, so `Dseq`s can be used as well. \n", + "\n", + "## Ligating fragments\n", + "\n", + "After cutting a DNA sequence, you can ligate the fragments back together in `pydna` using the `+` operator on `Dseqrecord` or `Dseq` objects. Ligation can occur via complementary sticky ends or blunt ends. For instance, we can select the first and second fragments from `multi_cut_records` via indexing, and then ligate sticky ends produced by EcoRI to make a single linear sequence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 261\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-261)\n", + "ATCT..TGAT\n", + "TAGA..ACTA\n" + ] + } + ], + "source": [ + "ligated_product = multi_cut_records[0] + multi_cut_records[1]\n", + "print(ligated_product)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also join blunt ends in a similar way. Note that the sticky-ends must be a perfect match to join. If `+` ligation (or any other method, really) doesn't work, make sure that:\n", + "\n", + "1. you are indeed performing the operation on `Dseqrecord` objects, as opposed to other data types (e.g lists, strings, etc)\n", + "2. `Dseqrecord` and the correct enzyme name (with correct roman numeral spelling) has been imported. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Circularizing fragments\n", + "\n", + "To circularize a cut DNA sequence use the `looped` method, which returns a new sequence object.\n", + "\n", + "🚨🚨 **VERY IMPORTANT** 🚨🚨 `.looped()` method does not act in place, so a new variable should be created to store the new circularised sequence, as shown in the following example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "is ligated_product circular? False\n", + "is circular_record circular? True\n", + "\n", + "Dseqrecord\n", + "circular: True\n", + "size: 261\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(o261)\n", + "ATCT..TGAT\n", + "TAGA..ACTA\n" + ] + } + ], + "source": [ + "circular_record = ligated_product.looped()\n", + "\n", + "print('is ligated_product circular?', ligated_product.circular)\n", + "print('is circular_record circular?', circular_record.circular)\n", + "print()\n", + "\n", + "print(circular_record)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra Notes: What happens to features when cutting/ligating?\n", + "\n", + "A feature is removed from a `Dseqrecord` if the features is truncated by the cut. For instance, the example_gene feature is removed from the record after cutting `record` with PstI, which has recognition site within example_gene. within the cutand if the feature is completely within the cut, it is retained. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 222\n", + "ID: id\n", + "Name: name\n", + "Description: description\n", + "Number of features: 0\n", + "/molecule_type=DNA\n", + "Dseq(-222)\n", + " GAGT..TAACTGCA\n", + "ACGTCTCA..ATTG \n" + ] + } + ], + "source": [ + "from Bio.Restriction import PstI\n", + "\n", + "cut_record2 = record.cut(PstI)\n", + "\n", + "print(cut_record2[0])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, if a cut does not overlap with the feature, the feature is retained on the `Dseqrecord`. For instance, if we go back to the first example given by the EcoRI cut, example_gene has been retained after cutting. For more information on Features, please refer to the `Dseq_Feature` documentations." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/notebooks/readme_example.ipynb b/docs/notebooks/readme_example.ipynb index 485c4891..54f67662 100644 --- a/docs/notebooks/readme_example.ipynb +++ b/docs/notebooks/readme_example.ipynb @@ -1,366 +1,366 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## README Example\n", - "\n", - "This notebook contains the example shown in the README file.\n", - "\n", - "\n", - " \"Open\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Install pydna (only when running on Colab)\n", - "import sys\n", - "if 'google.colab' in sys.modules:\n", - " %pip install pydna\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(-60)\n", - "\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0m\n", - "TACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTA" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pydna.dseqrecord import Dseqrecord\n", - "# Let's create a DNA sequence record, and add a feature to it\n", - "dsr = Dseqrecord(\"ATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\")\n", - "dsr.add_feature(x=0, y=60,type=\"gene\", label=\"my_gene\") # We add a feature to highlight the sequence as a gene\n", - "dsr.figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LOCUS name 60 bp DNA linear UNK 01-JAN-1980\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - " misc 1..60\n", - " /type=\"gene\"\n", - " /label=\"my_gene\"\n", - "ORIGIN\n", - " 1 atgcaaacag taatgatgga tgacattcaa agcactgatt ctattgctga aaaagataat\n", - "//\n" - ] - } - ], - "source": [ - "# This is how it would look as a genbank file\n", - "print(dsr.format(\"genbank\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "forward primer: ATGCAAACAGTAATGATGGA\n", - "reverse primer: ATTATCTTTTTCAGCAATAGAATCA\n" - ] - }, - { - "data": { - "text/plain": [ - "5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", - " |||||||||||||||||||||||||\n", - " 3ACTAAGATAACGACTTTTTCTATTA5\n", - "5ATGCAAACAGTAATGATGGA3\n", - " ||||||||||||||||||||\n", - "3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Now let's design primers to amplify it\n", - "from pydna.design import primer_design\n", - "# limit is the minimum length of the primer, target_tm is the desired melting temperature of the primer\n", - "amplicon = primer_design(dsr, limit=13, target_tm=55)\n", - "# Let's print the primers, and a figure that shows where they align with the template sequence\n", - "print(\"forward primer:\", amplicon.forward_primer.seq)\n", - "print(\"reverse primer:\", amplicon.reverse_primer.seq)\n", - "amplicon.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " 5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", - " |||||||||||||||||||||||||\n", - " 3ACTAAGATAACGACTTTTTCTATTACCTAGGtttt5\n", - "5ccccGGATCCATGCAAACAGTAATGATGGA3\n", - " ||||||||||||||||||||\n", - " 3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's say we don't want to just amplify it, but we want to add restriction sites to it!\n", - "\n", - "from pydna.amplify import pcr\n", - "# We add the restriction sites to the primers\n", - "forward_primer = \"ccccGGATCC\" + amplicon.forward_primer\n", - "reverse_primer = \"ttttGGATCC\" + amplicon.reverse_primer\n", - "\n", - "# We do the PCR\n", - "pcr_product = pcr(forward_primer, reverse_primer, dsr)\n", - "# The PCR product is of class `Amplicon`, a subclass of `Dseqrecord`.\n", - "# When doing a figure, it shows where primers anneal.\n", - "pcr_product.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(-80)\n", - "ccccGGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCCaaaa\n", - "ggggCCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGGtttt" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# If we want to see the sequence more clearly, we can turn it into a `Dseqrecord`\n", - "pcr_product = Dseqrecord(pcr_product)\n", - "pcr_product.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dseqrecord(-9)\n", - "\u001b[48;5;11m\u001b[0mccccG \n", - "ggggCCTAG\n", - "\n", - "Dseqrecord(-70)\n", - "GATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mG \n", - " GTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAG\n", - "\n", - "Dseqrecord(-9)\n", - "\u001b[48;5;11m\u001b[0mGATCCaaaa\n", - " Gtttt\n" - ] - } - ], - "source": [ - "from Bio.Restriction import BamHI # cuts GGATCC\n", - "# a, payload, c are the cut fragments\n", - "a, payload, c = pcr_product.cut (BamHI)\n", - "print(a.figure())\n", - "print()\n", - "print (payload.figure())\n", - "print()\n", - "print(c.figure())\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(o50)\n", - "\u001b[48;5;11m\u001b[0maatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\n", - "ttacaaaaagggaGGGCCCgttttaTCTAGAacgatacgtagtagctaga" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We create a circular vector to insert the amplicon into\n", - "vector = Dseqrecord(\"aatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\", circular=True, name=\"vect\")\n", - "vector.figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(o116)\n", - "aatgtttttccctCCCGGGcaaaatAGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCTtgctatgcatcatcgatct\n", - "ttacaaaaagggaGGGCCCgttttaTCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGAacgatacgtagtagctaga" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from Bio.Restriction import BglII # cuts AGATCT\n", - "linear_vector_bgl = vector.cut(BglII)[0] # Linearize the vector at BglII (produces only one fragment)\n", - "\n", - "# Ligate the fragment of interest to the vector, and call looped() to circularize it\n", - "# synced is used to place the origin coordinate (0) in the same place for rec_vector and vector\n", - "rec_vector= (linear_vector_bgl + payload).looped().synced(vector)\n", - "rec_vector.figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " -|fragment_A|13\n", - "| \\/\n", - "| /\\\n", - "| 13|fragment_B|13\n", - "| \\/\n", - "| /\\\n", - "| 13|fragment_C|13\n", - "| \\/\n", - "| /\\\n", - "| 13-\n", - "| |\n", - " --------------------------------------------" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's simulate a Gibson assembly\n", - "from pydna.assembly import Assembly\n", - "\n", - "fragments = [\n", - " Dseqrecord('aatgtttttccctCACTACGtgctatgcatcat', name=\"fragment_A\"),\n", - " Dseqrecord('tgctatgcatcatCTATGGAcactctaataatg', name=\"fragment_B\"),\n", - " Dseqrecord('cactctaataatgTTACATAaatgtttttccct', name=\"fragment_C\"),\n", - "]\n", - "\n", - "# limit is the min. homology length between fragments in the assembly\n", - "asm = Assembly(fragments, limit=10)\n", - "\n", - "# From the assembly object, which can generate all possible products, get a circular\n", - "product, *rest = asm.assemble_circular()\n", - "\n", - "# We can print a figure that shows the overlaps between fragments\n", - "product.figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Dseqrecord(o60)\n", - "\u001b[48;5;11m\u001b[0maatgtttttccctCACTACGtgctatgcatcatCTATGGAcactctaataatgTTACATA\n", - "ttacaaaaagggaGTGATGCacgatacgtagtaGATACCTgtgagattattacAATGTAT" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Or show the final sequence:\n", - "Dseqrecord(product).figure()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## README Example\n", + "\n", + "This notebook contains the example shown in the README file.\n", + "\n", + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# Install pydna (only when running on Colab)\n", + "import sys\n", + "if 'google.colab' in sys.modules:\n", + " %pip install pydna\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(-60)\n", + "\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0m\n", + "TACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTA" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "# Let's create a DNA sequence record, and add a feature to it\n", + "dsr = Dseqrecord(\"ATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\")\n", + "dsr.add_feature(x=0, y=60,type=\"gene\", label=\"my_gene\") # We add a feature to highlight the sequence as a gene\n", + "dsr.figure()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOCUS name 60 bp DNA linear UNK 01-JAN-1980\n", + "DEFINITION description.\n", + "ACCESSION id\n", + "VERSION id\n", + "KEYWORDS .\n", + "SOURCE .\n", + " ORGANISM .\n", + " .\n", + "FEATURES Location/Qualifiers\n", + " misc 1..60\n", + " /type=\"gene\"\n", + " /label=\"my_gene\"\n", + "ORIGIN\n", + " 1 atgcaaacag taatgatgga tgacattcaa agcactgatt ctattgctga aaaagataat\n", + "//\n" + ] + } + ], + "source": [ + "# This is how it would look as a genbank file\n", + "print(dsr.format(\"genbank\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "forward primer: ATGCAAACAGTAATGATGGA\n", + "reverse primer: ATTATCTTTTTCAGCAATAGAATCA\n" + ] }, - "nbformat": 4, - "nbformat_minor": 2 + { + "data": { + "text/plain": [ + "5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", + " |||||||||||||||||||||||||\n", + " 3ACTAAGATAACGACTTTTTCTATTA5\n", + "5ATGCAAACAGTAATGATGGA3\n", + " ||||||||||||||||||||\n", + "3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now let's design primers to amplify it\n", + "from pydna.design import primer_design\n", + "# limit is the minimum length of the primer, target_tm is the desired melting temperature of the primer\n", + "amplicon = primer_design(dsr, limit=13, target_tm=55)\n", + "# Let's print the primers, and a figure that shows where they align with the template sequence\n", + "print(\"forward primer:\", amplicon.forward_primer.seq)\n", + "print(\"reverse primer:\", amplicon.reverse_primer.seq)\n", + "amplicon.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " 5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n", + " |||||||||||||||||||||||||\n", + " 3ACTAAGATAACGACTTTTTCTATTACCTAGGtttt5\n", + "5ccccGGATCCATGCAAACAGTAATGATGGA3\n", + " ||||||||||||||||||||\n", + " 3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's say we don't want to just amplify it, but we want to add restriction sites to it!\n", + "\n", + "from pydna.amplify import pcr\n", + "# We add the restriction sites to the primers\n", + "forward_primer = \"ccccGGATCC\" + amplicon.forward_primer\n", + "reverse_primer = \"ttttGGATCC\" + amplicon.reverse_primer\n", + "\n", + "# We do the PCR\n", + "pcr_product = pcr(forward_primer, reverse_primer, dsr)\n", + "# The PCR product is of class `Amplicon`, a subclass of `Dseqrecord`.\n", + "# When doing a figure, it shows where primers anneal.\n", + "pcr_product.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(-80)\n", + "ccccGGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCCaaaa\n", + "ggggCCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGGtttt" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# If we want to see the sequence more clearly, we can turn it into a `Dseqrecord`\n", + "pcr_product = Dseqrecord(pcr_product)\n", + "pcr_product.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord(-9)\n", + "\u001b[48;5;11m\u001b[0mccccG \n", + "ggggCCTAG\n", + "\n", + "Dseqrecord(-70)\n", + "GATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mG \n", + " GTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAG\n", + "\n", + "Dseqrecord(-9)\n", + "\u001b[48;5;11m\u001b[0mGATCCaaaa\n", + " Gtttt\n" + ] + } + ], + "source": [ + "from Bio.Restriction import BamHI # cuts GGATCC\n", + "# a, payload, c are the cut fragments\n", + "a, payload, c = pcr_product.cut (BamHI)\n", + "print(a.figure())\n", + "print()\n", + "print (payload.figure())\n", + "print()\n", + "print(c.figure())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(o50)\n", + "\u001b[48;5;11m\u001b[0maatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\n", + "ttacaaaaagggaGGGCCCgttttaTCTAGAacgatacgtagtagctaga" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We create a circular vector to insert the amplicon into\n", + "vector = Dseqrecord(\"aatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\", circular=True, name=\"vect\")\n", + "vector.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(o116)\n", + "aatgtttttccctCCCGGGcaaaatAGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCTtgctatgcatcatcgatct\n", + "ttacaaaaagggaGGGCCCgttttaTCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGAacgatacgtagtagctaga" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from Bio.Restriction import BglII # cuts AGATCT\n", + "linear_vector_bgl = vector.cut(BglII)[0] # Linearize the vector at BglII (produces only one fragment)\n", + "\n", + "# Ligate the fragment of interest to the vector, and call looped() to circularize it\n", + "# synced is used to place the origin coordinate (0) in the same place for rec_vector and vector\n", + "rec_vector= (linear_vector_bgl + payload).looped().synced(vector)\n", + "rec_vector.figure()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " -|fragment_A|13\n", + "| \\/\n", + "| /\\\n", + "| 13|fragment_B|13\n", + "| \\/\n", + "| /\\\n", + "| 13|fragment_C|13\n", + "| \\/\n", + "| /\\\n", + "| 13-\n", + "| |\n", + " --------------------------------------------" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's simulate a Gibson assembly\n", + "from pydna.assembly import Assembly\n", + "\n", + "fragments = [\n", + " Dseqrecord('aatgtttttccctCACTACGtgctatgcatcat', name=\"fragment_A\"),\n", + " Dseqrecord('tgctatgcatcatCTATGGAcactctaataatg', name=\"fragment_B\"),\n", + " Dseqrecord('cactctaataatgTTACATAaatgtttttccct', name=\"fragment_C\"),\n", + "]\n", + "\n", + "# limit is the min. homology length between fragments in the assembly\n", + "asm = Assembly(fragments, limit=10)\n", + "\n", + "# From the assembly object, which can generate all possible products, get a circular\n", + "product, *rest = asm.assemble_circular()\n", + "\n", + "# We can print a figure that shows the overlaps between fragments\n", + "product.figure()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dseqrecord(o60)\n", + "\u001b[48;5;11m\u001b[0maatgtttttccctCACTACGtgctatgcatcatCTATGGAcactctaataatgTTACATA\n", + "ttacaaaaagggaGTGATGCacgatacgtagtaGATACCTgtgagattattacAATGTAT" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Or show the final sequence:\n", + "Dseqrecord(product).figure()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } From 43d75e66609f263ef6c900595a5d3701e1125896 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Wed, 15 Jan 2025 16:36:16 +0000 Subject: [PATCH 8/8] fix author string for poetry 1.8 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 35dcb88b..6472b3e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ readme = "README.md" Changelog = "https://github.com/pydna-group/pydna/blob/master/docs/CHANGELOG.md#changelog" [tool.poetry] description = "Representing double stranded DNA and functions for simulating cloning and homologous recombination between DNA molecules." -authors = ["Björn F. Johansson", "Manuel Lera-Ramirez"] +authors = ["Björn F. Johansson ", "Manuel Lera-Ramirez "] documentation = "https://pydna-group.github.io/pydna" homepage = "https://github.com/pydna-group/pydna#-pydna" license = "BSD"