diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..78fda48 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,54 @@ +FROM python:3.9-slim@sha256:5f0192a4f58a6ce99f732fe05e3b3d00f12ae62e183886bca3ebe3d202686c7f + +ENV PATH /usr/local/bin:$PATH +ENV PYTHON_VERSION 3.9.17 + +RUN \ + adduser --system --disabled-password --shell /bin/bash vscode && \ + # install docker + apt-get update && \ + apt-get install ca-certificates curl gnupg lsb-release -y && \ + mkdir -m 0755 -p /etc/apt/keyrings && \ + curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ + apt-get update && \ + apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y && \ + usermod -aG docker vscode && \ + apt-get clean + +RUN \ + # dev setup + apt update && \ + apt-get install sudo git bash-completion graphviz default-mysql-client s3fs procps -y && \ + usermod -aG sudo vscode && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ + pip install --no-cache-dir --upgrade black pip nbconvert && \ + echo '. /etc/bash_completion' >> /home/vscode/.bashrc && \ + echo 'export PS1="\[\e[32;1m\]\u\[\e[m\]@\[\e[34;1m\]\H\[\e[m\]:\[\e[33;1m\]\w\[\e[m\]$ "' >> /home/vscode/.bashrc && \ + apt-get clean + +COPY ./ /tmp/element-moseq/ + +RUN \ + # pipeline dependencies + apt-get install gcc g++ ffmpeg libsm6 libxext6 libgl1 libegl1 -y && \ + pip install --no-cache-dir -e /tmp/element-moseq[elements,tests] && \ + # clean up + rm -rf /tmp/element-moseq/ && \ + apt-get clean + +# Install Keypoint-MoSeq (CPU version) +RUN pip install "jax[cpu]==0.3.22" -f https://storage.googleapis.com/jax-releases/jax_releases.html + +ENV DJ_HOST fakeservices.datajoint.io +ENV DJ_USER root +ENV DJ_PASS simple + +ENV KPMS_ROOT_DATA_DIR /workspaces/element-moseq/example_data/inbox +ENV KPMS_ROOT_OUTPUT_DIR /workspaces/element-moseq/example_data/outbox +ENV DATABASE_PREFIX neuro_ + +USER vscode +CMD bash -c "sudo rm /var/run/docker.pid; sudo dockerd" + +ENV LD_LIBRARY_PATH="/lib:/opt/conda/lib" \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..5717d05 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Environment + Data", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "mkdir -p ${KPMS_ROOT_DATA_DIR} && pip install -e .", + "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${KPMS_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", + "hostRequirements": { + "cpus": 4, + "memory": "8gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, + "vscode": { + "extensions": [ + "ms-python.python@2023.8.0", + "ms-toolsai.jupyter@2023.3.1201040234" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml new file mode 100644 index 0000000..4831410 --- /dev/null +++ b/.devcontainer/docker-compose.yaml @@ -0,0 +1,25 @@ +version: "3" +services: + app: + cpus: 4 + mem_limit: 8g + build: + context: .. + dockerfile: ./.devcontainer/Dockerfile + # image: datajoint/element_moseq:latest + extra_hosts: + - fakeservices.datajoint.io:127.0.0.1 + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-moseq/v1 + devices: + - /dev/fuse + cap_add: + - SYS_ADMIN + security_opt: + - apparmor:unconfined + volumes: + - ..:/workspaces/element-moseq:cached + - docker_data:/var/lib/docker # persist docker images + privileged: true # only because of dind +volumes: + docker_data: diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..31fe9fc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,39 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: 'bug' +assignees: '' + +--- + +## Bug Report + +### Description + +A clear and concise description of what is the overall operation that is intended to be +performed that resulted in an error. + +### Reproducibility +Include: +- OS (WIN | MACOS | Linux) +- DataJoint Element Version +- MySQL Version +- MySQL Deployment Strategy (local-native | local-docker | remote) +- Minimum number of steps to reliably reproduce the issue +- Complete error stack as a result of evaluating the above steps + +### Expected Behavior +A clear and concise description of what you expected to happen. + +### Screenshots +If applicable, add screenshots to help explain your problem. + +### Additional Research and Context +Add any additional research or context that was conducted in creating this report. + +For example: +- Related GitHub issues and PR's either within this repository or in other relevant + repositories. +- Specific links to specific lines or a focus within source code. +- Relevant summary of Maintainers development meetings, milestones, projects, etc. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..b3d197d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: DataJoint Contribution Guideline + url: https://datajoint.com/docs/community/contribute/ + about: Please make sure to review the DataJoint Contribution Guidelines \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..1f2b784 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,57 @@ +--- +name: Feature request +about: Suggest an idea for a new feature +title: '' +labels: 'enhancement' +assignees: '' + +--- + +## Feature Request + +### Problem + +A clear and concise description how this idea has manifested and the context. Elaborate +on the need for this feature and/or what could be improved. Ex. I'm always frustrated +when [...] + +### Requirements + +A clear and concise description of the requirements to satisfy the new feature. Detail +what you expect from a successful implementation of the feature. Ex. When using this +feature, it should [...] + +### Justification + +Provide the key benefits in making this a supported feature. Ex. Adding support for this +feature would ensure [...] + +### Alternative Considerations + +Do you currently have a work-around for this? Provide any alternative solutions or +features you've considered. + +### Related Errors +Add any errors as a direct result of not exposing this feature. + +Please include steps to reproduce provided errors as follows: +- OS (WIN | MACOS | Linux) +- DataJoint Element Version +- MySQL Version +- MySQL Deployment Strategy (local-native | local-docker | remote) +- Minimum number of steps to reliably reproduce the issue +- Complete error stack as a result of evaluating the above steps + +### Screenshots +If applicable, add screenshots to help explain your feature. + +### Additional Research and Context +Add any additional research or context that was conducted in creating this feature request. + +For example: +- Related GitHub issues and PR's either within this repository or in other relevant + repositories. +- Specific links to specific lines or a focus within source code. +- Relevant summary of Maintainers development meetings, milestones, projects, etc. +- Any additional supplemental web references or links that would further justify this + feature request. diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..4a5f2cb --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,27 @@ +name: Release +on: + workflow_dispatch: +jobs: + make_github_release: + uses: datajoint/.github/.github/workflows/make_github_release.yaml@main + pypi_release: + needs: make_github_release + uses: datajoint/.github/.github/workflows/pypi_release.yaml@main + secrets: + TWINE_USERNAME: ${{secrets.TWINE_USERNAME}} + TWINE_PASSWORD: ${{secrets.TWINE_PASSWORD}} + with: + UPLOAD_URL: ${{needs.make_github_release.outputs.release_upload_url}} + mkdocs_release: + uses: datajoint/.github/.github/workflows/mkdocs_release.yaml@main + permissions: + contents: write + devcontainer-build: + uses: datajoint/.github/.github/workflows/devcontainer-build.yaml@main + devcontainer-publish: + needs: + - devcontainer-build + uses: datajoint/.github/.github/workflows/devcontainer-publish.yaml@main + secrets: + DOCKERHUB_USERNAME: ${{secrets.DOCKERHUB_USERNAME}} + DOCKERHUB_TOKEN: ${{secrets.DOCKERHUB_TOKEN_FOR_ELEMENTS}} \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..842b9db --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,37 @@ +name: Test +on: + push: + pull_request: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1" +jobs: + devcontainer-build: + uses: datajoint/.github/.github/workflows/devcontainer-build.yaml@main + tests: + runs-on: ubuntu-latest + strategy: + matrix: + py_ver: ["3.9", "3.10"] + mysql_ver: ["8.0", "5.7"] + include: + - py_ver: "3.8" + mysql_ver: "5.7" + - py_ver: "3.7" + mysql_ver: "5.7" + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{matrix.py_ver}} + uses: actions/setup-python@v4 + with: + python-version: ${{matrix.py_ver}} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 "black[jupyter]" + - name: Run style tests + run: | + python_version=${{matrix.py_ver}} + black element_moseq --check --verbose --target-version py${python_version//.} + black notebooks --check --verbose --target-version py${python_version//.} + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..91f9376 --- /dev/null +++ b/.gitignore @@ -0,0 +1,133 @@ +# User data +.DS_Store + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution, packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +.idea/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete*.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy +scratchpaper.* + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ +./.env + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# datajoint +dj_local_c*.json +dj_*.y*ml +temp* +temp/* + +# docs +/docs/site +/docs/src/tutorials/*ipynb + +# emacs +**/*~ +**/#*# +**/.#* + +# Codespaces +example_data + +#nwb export +*nwb + +# vscode +*.code-workspace +.vscode diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 0000000..0e9ceeb --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,17 @@ +# Markdown Linter configuration for docs +# https://github.com/DavidAnson/markdownlint +# https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md +MD009: false # permit trailing spaces +MD007: false # List indenting - permit 4 spaces +MD013: + line_length: "88" # Line length limits + tables: false # disable for tables + headings: false # disable for headings +MD030: false # Number of spaces after a list +MD033: # HTML elements allowed + allowed_elements: + - "figure" + - "figcaption" +MD034: false # Permit bare URLs +MD031: false # Spacing w/code blocks. Conflicts with `??? Note` and code tab styling +MD046: false # Spacing w/code blocks. Conflicts with `??? Note` and code tab styling diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e991fd6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,59 @@ +default_stages: [commit, push] +exclude: (^.github/|^docs/|^images/|^notebooks/|^tests/) +# Current tests/__init__ violates many flake8. Excluding pending change to conftest + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files # prevent giant files from being committed + - id: requirements-txt-fixer + - id: mixed-line-ending + args: ["--fix=lf"] + description: Forces to replace line ending by the UNIX 'lf' character. + + # black + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + - id: black-jupyter + args: + - --line-length=88 + + # isort + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black"] + description: Sorts imports in an alphabetical order + + # flake8 + - repo: https://github.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: # arguments to configure flake8 + # making isort line length compatible with black + - "--max-line-length=88" + - "--max-complexity=18" + - "--select=B,C,E,F,W,T4,B9" + + # these are errors that will be ignored by flake8 + # https://www.flake8rules.com/rules/{code}.html + - "--ignore=E203,E501,W503,W605,E402" + # E203 - Colons should not have any space before them. + # Needed for list indexing + # E501 - Line lengths are recommended to be no greater than 79 characters. + # Needed as we conform to 88 + # W503 - Line breaks should occur after the binary operator. + # Needed because not compatible with black + # W605 - a backslash-character pair that is not a valid escape sequence now + # generates a DeprecationWarning. This will eventually become a SyntaxError. + # Needed because we use \d as an escape sequence + # E402 - Place module level import at the top. + # Needed to prevent circular import error diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..628a4a6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,20 @@ +# Changelog + +Observes [Semantic Versioning](https://semver.org/spec/v2.0.0.html) standard and +[Keep a Changelog](https://keepachangelog.com/en/1.0.0/) convention. + +## [0.1.0] - 2024-03-20 + ++ Add - `CHANGELOG` and version for first release ++ Add - DevContainer configuration for GitHub Codespaces ++ Add - Updated documentation in `docs` for schemas and tutorial ++ Add - `kpms_reader` readers ++ Add - `element_moseq` pipeline architecture and design containing `kpms_pca` and `kpms_model` modules ++ Add - `images` with flowchart and pipeline images ++ Add - `tutorial.ipynb` consistent across DataJoint Elements that can be launched using GitHub Codespaces ++ Add - `tutorial_pipeline.py` script for notebooks to import and activate schemas ++ Add - spelling, markdown, and pre-commit config files ++ Add - GitHub Actions that call reusable workflows in the `datajoint/.github` repository ++ Add - `LICENSE`, `CONTRIBUTING`, `CODE_OF_CONDUCT` ++ Add - `README` consistent across DataJoint Elements ++ Add - `setup.py` with `extras_require` and `tests` features diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..0502528 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[Support@DataJoint.com](mailto:support@datajoint.com). +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..2bd0f49 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,5 @@ +# Contribution Guidelines + +This project follows the +[DataJoint Contribution Guidelines](https://datajoint.com/docs/about/contribute/). +Please reference the link for more full details. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6872305 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 DataJoint + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 8b13789..c270049 100644 --- a/README.md +++ b/README.md @@ -1 +1,75 @@ +# DataJoint Element for Motion Sequencing with Keypoint-MoSeq +DataJoint Element for advanced motion sequencing of animal behavior using [Keypoint-MoSeq](https://dattalab.github.io/moseq2-website/index.html). This Element facilitates Keypoint-MoSeq analysis, employing an advanced generative model to automatically identify behavioral modules or "syllables" from keypoint data extracted from conventional video recordings of animal behavior, eliminating the need for manual intervention. + +DataJoint Elements collectively standardize and automate data collection and analysis for neuroscience experiments. Each Element is a modular pipeline for data storage and processing with corresponding database tables that can be combined with other Elements to assemble a fully functional pipeline. This repository also provides a tutorial environment and notebooks to learn the pipeline. + +## Experiment Flowchart + +![flowchart](https://raw.githubusercontent.com/datajoint/element-moseq/main/images/flowchart.svg) + +## Data Pipeline Diagram + +![pipeline](https://raw.githubusercontent.com/datajoint/element-moseq/main/images/pipeline.svg) + +## Getting Started + ++ Please fork this repository. + ++ Clone the repository to your computer. + + ```bash + git clone https://github.com//element-moseq + ``` + ++ Install with `pip`: + + ```bash + pip install -e . + ``` + ++ [Interactive tutorial on GitHub Codespaces](https://github.com/datajoint/element-moseq#interactive-tutorial) + ++ [Documentation](https://datajoint.com/docs/elements/element-moseq) + +## Support + ++ If you need help getting started or run into any errors, please open a GitHub Issue +or contact our team by email at support@datajoint.com. + +## Interactive Tutorial + ++ The easiest way to learn about DataJoint Elements is to use the tutorial notebooks within the included interactive environment configured using [Dev Container](https://containers.dev/). + +### Launch Environment + +Here are some options that provide a great experience: + +- (*recommended*) Cloud-based Environment + - Launch using [GitHub Codespaces](https://github.com/features/codespaces) using the `+` option which will `Create codespace on main` in the codebase repository on your fork with default options. For more control, see the `...` where you may create `New with options...`. + - Build time for a codespace is a few minutes. This is done infrequently and cached for convenience. + - Start time for a codespace is less than 1 minute. This will pull the built codespace from cache when you need it. + - *Tip*: Each month, GitHub renews a [free-tier](https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces#monthly-included-storage-and-core-hours-for-personal-accounts) quota of compute and storage. Typically we run into the storage limits before anything else since Codespaces consume storage while stopped. It is best to delete Codespaces when not actively in use and recreate when needed. We'll soon be creating prebuilds to avoid larger build times. Once any portion of your quota is reached, you will need to wait for it to be reset at the end of your cycle or add billing info to your GitHub account to handle overages. + - *Tip*: GitHub auto names the codespace but you can rename the codespace so that it is easier to identify later. + +- Local Environment + > *Note: Access to example data is currently limited to MacOS and Linux due to the s3fs utility. Windows users are recommended to use the above environment.* + - Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + - Install [Docker](https://docs.docker.com/get-docker/) + - Install [VSCode](https://code.visualstudio.com/) + - Install the VSCode [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + - `git clone` the codebase repository and open it in VSCode + - Use the `Dev Containers extension` to `Reopen in Container` (More info is in the `Getting started` included with the extension.) + +You will know your environment has finished loading once you either see a terminal open related to `Running postStartCommand` with a final message of `Done` or the `README.md` is opened in `Preview`. + +Once the environment has launched, please run the following command in the terminal: +``` +MYSQL_VER=8.0 docker compose -f docker-compose-db.yaml up --build -d +``` + +### Instructions + +1. We recommend you start by navigating to the `notebooks` directory on the left panel and go through the `tutorial.ipynb` Jupyter notebook. Execute the cells in the notebook to begin your walkthrough of the tutorial. + +1. Once you are done, see the options available to you in the menu in the bottom-left corner. For example, in Codespace you will have an option to `Stop Current Codespace` but when running Dev Container on your own machine the equivalent option is `Reopen folder locally`. By default, GitHub will also automatically stop the Codespace after 30 minutes of inactivity. Once the Codespace is no longer being used, we recommend deleting the Codespace. diff --git a/cspell.json b/cspell.json new file mode 100644 index 0000000..325eacf --- /dev/null +++ b/cspell.json @@ -0,0 +1,210 @@ +// cSpell Settings +//https://github.com/streetsidesoftware/vscode-spell-checker +{ + "version": "0.2", // Version of the setting file. Always 0.2 + "language": "en", // language - current active spelling language + "enabledLanguageIds": [ + "markdown", + "yaml", + "python" + ], + // flagWords - list of words to be always considered incorrect + // This is useful for offensive words and common spelling errors. + // For example "hte" should be "the" + "flagWords": [], + "allowCompoundWords": true, + "ignorePaths": [ + "./element_moseq.egg-info/*", + "./images/*" + ], + "words": [ + "acorr", + "aggr", + "Alessio", + "Andreas", + "apmeta", + "arange", + "arithmatex", + "asarray", + "astype", + "autocorrelogram", + "Axona", + "bbins", + "bdist", + "Binarize", + "bouton", + "Brody", + "Bruker", + "bshift", + "Buccino", + "catgt", + "cbar", + "cbin", + "cdat", + "chans", + "Chans", + "chns", + "Clust", + "clusterings", + "cmap", + "cnmf", + "correlogram", + "correlograms", + "curations", + "DANDI", + "decomp", + "deconvolution", + "DISTRO", + "djbase", + "dtype", + "ecephys", + "Eftychios", + "electrophysiogical", + "elif", + "Ephys", + "fluo", + "fneu", + "Fneu", + "gblcar", + "gfix", + "Giovannucci", + "Hakan", + "hdmf", + "HHMI", + "hstack", + "ibllib", + "ifnull", + "imax", + "Imax", + "IMAX", + "imec", + "imread", + "imro", + "imrotbl", + "imshow", + "Inan", + "inlinehilite", + "iplane", + "ipynb", + "ipywidgets", + "iscell", + "Kavli", + "kcoords", + "Klusta", + "Kwik", + "lfmeta", + "linenums", + "masky", + "mathjax", + "mdict", + "Mesoscale", + "mesoscope", + "mkdocs", + "mkdocstrings", + "Moser", + "mtscomp", + "nblocks", + "nchan", + "Nchan", + "nchannels", + "ndarray", + "ndepths", + "ndim", + "ndimage", + "Neuralynx", + "NEURO", + "neuroconv", + "Neurodata", + "Neurolabware", + "neuropil", + "Neuropil", + "Neuropix", + "neuropixel", + "NeuroPixels", + "nfields", + "nframes", + "npix", + "nplanes", + "nrois", + "NTNU", + "nwbfile", + "NWBHDF", + "oebin", + "openephys", + "openpyxl", + "Pachitariu", + "paramsets", + "phylog", + "plotly", + "Pnevmatikakis", + "PSTH", + "pykilosort", + "pymdownx", + "pynwb", + "pyopenephys", + "pyplot", + "pytest", + "quantile", + "Reimer", + "repolarization", + "Roboto", + "roidetect", + "rois", + "ROIs", + "RRID", + "Rxiv", + "Sasaki", + "sbxreader", + "scipy", + "sdist", + "sess", + "SGLX", + "Shen", + "Siegle", + "Sitonic", + "spikeglx", + "spkcount", + "spks", + "Stereotaxic", + "Sutter", + "tcat", + "tickvals", + "tofile", + "Tolias", + "tqdm", + "usecs", + "usedb", + "Vidrio's", + "vline", + "vmax", + "Vmax", + "voxel", + "xanchor", + "xaxes", + "xaxis", + "xblock", + "xcoords", + "xcorr", + "xlabel", + "xlim", + "xoff", + "xpix", + "XPOS", + "xtick", + "yanchor", + "Yatsenko", + "yaxes", + "yaxis", + "yblock", + "ycoord", + "ycoords", + "ylabel", + "ylim", + "yoff", + "ypix", + "YPOS", + "yref", + "yticks", + "zpix" + ] +} \ No newline at end of file diff --git a/docker-compose-db.yaml b/docker-compose-db.yaml new file mode 100644 index 0000000..1d453c8 --- /dev/null +++ b/docker-compose-db.yaml @@ -0,0 +1,15 @@ +# MYSQL_VER=8.0 docker compose -f docker-compose-db.yaml up --build +version: "3" +services: + db: + restart: always + image: datajoint/mysql:${MYSQL_VER} + environment: + - MYSQL_ROOT_PASSWORD=${DJ_PASS} + ports: + - "3306:3306" + healthcheck: + test: [ "CMD", "mysqladmin", "ping", "-h", "localhost" ] + timeout: 15s + retries: 10 + interval: 15s diff --git a/docs/.docker/Dockerfile b/docs/.docker/Dockerfile new file mode 100644 index 0000000..340dea5 --- /dev/null +++ b/docs/.docker/Dockerfile @@ -0,0 +1,17 @@ +FROM datajoint/miniconda3:4.10.3-py3.9-alpine +ARG PACKAGE +WORKDIR /main +COPY --chown=anaconda:anaconda ./docs/.docker/apk_requirements.txt ${APK_REQUIREMENTS} +COPY --chown=anaconda:anaconda ./docs/.docker/pip_requirements.txt ${PIP_REQUIREMENTS} +RUN \ + umask u+rwx,g+rwx,o-rwx && \ + /entrypoint.sh echo "Dependencies installed" && \ + rm ${APK_REQUIREMENTS} ${PIP_REQUIREMENTS} && \ + git config --global user.name "GitHub Action" && \ + git config --global user.email "action@github.com"&& \ + git config --global pull.rebase false && \ + git init +COPY --chown=anaconda:anaconda ./${PACKAGE} /main/${PACKAGE} +COPY --chown=anaconda:anaconda ./docs/mkdocs.yaml /main/docs/mkdocs.yaml +COPY --chown=anaconda:anaconda ./docs/src /main/docs/src +COPY --chown=anaconda:anaconda ./CHANGELOG.md /main/ \ No newline at end of file diff --git a/docs/.docker/apk_requirements.txt b/docs/.docker/apk_requirements.txt new file mode 100644 index 0000000..0899c29 --- /dev/null +++ b/docs/.docker/apk_requirements.txt @@ -0,0 +1 @@ +git \ No newline at end of file diff --git a/docs/.docker/pip_requirements.txt b/docs/.docker/pip_requirements.txt new file mode 100644 index 0000000..ae44fb5 --- /dev/null +++ b/docs/.docker/pip_requirements.txt @@ -0,0 +1,12 @@ +mkdocs-material +mkdocs-redirects +mkdocstrings +mkdocstrings-python +mike +mdx-truly-sane-lists +mkdocs-gen-files +mkdocs-literate-nav +mkdocs-exclude-search +mkdocs-markdownextradata-plugin +mkdocs-jupyter +mkdocs-section-index \ No newline at end of file diff --git a/docs/docker-compose.yaml b/docs/docker-compose.yaml new file mode 100644 index 0000000..1eb04eb --- /dev/null +++ b/docs/docker-compose.yaml @@ -0,0 +1,54 @@ +# MODE="LIVE|QA|PUSH" PACKAGE=element_moseq UPSTREAM_REPO=https://github.com/datajoint/element-moseq.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build +version: "2.4" +services: + docs: + build: + dockerfile: docs/.docker/Dockerfile + context: ../ + args: + - PACKAGE + image: ${PACKAGE}-docs + environment: + - PACKAGE + - UPSTREAM_REPO + - MODE + - PATCH_VERSION + volumes: + - ../docs:/main/docs + - ../${PACKAGE}:/main/${PACKAGE} + - ../notebooks:/main/notebooks + user: ${HOST_UID}:anaconda + ports: + - 80:80 + command: + - sh + - -c + - | + git config --global --add safe.directory /main + set -e + export ELEMENT_UNDERSCORE=$$(echo $${PACKAGE} | sed 's/element_//g') + export ELEMENT_HYPHEN=$$(echo $${ELEMENT_UNDERSCORE} | sed 's/_/-/g') + export PATCH_VERSION=$$(cat /main/$${PACKAGE}/version.py | grep -oE '\d+\.\d+\.[a-z0-9]+') + + cp /main/notebooks/tutorial.ipynb /main/docs/src/tutorials/ + + if echo "$${MODE}" | grep -i live &>/dev/null; then + mkdocs serve --config-file ./docs/mkdocs.yaml -a 0.0.0.0:80 2>&1 | tee docs/temp_mkdocs.log + elif echo "$${MODE}" | grep -iE "qa|push" &>/dev/null; then + echo "INFO::Delete gh-pages branch" + git branch -D gh-pages || true + echo "INFO::Fetch upstream gh-pages" + git fetch $${UPSTREAM_REPO} gh-pages:gh-pages && git switch gh-pages || git switch --orphan gh-pages && git commit --allow-empty -m "init commit" + echo "INFO::mike" + mike deploy --config-file ./docs/mkdocs.yaml -u $$(grep -oE '\d+\.\d+' /main/$${PACKAGE}/version.py) latest + mike set-default --config-file ./docs/mkdocs.yaml latest + if echo "$${MODE}" | grep -i qa &>/dev/null; then + mike serve --config-file ./docs/mkdocs.yaml -a 0.0.0.0:80 + elif echo "$${MODE}" | grep -i push &>/dev/null; then + echo "INFO::Push gh-pages to upstream" + git push $${UPSTREAM_REPO} gh-pages + fi + else + echo "Unexpected mode..." + exit 1 + fi \ No newline at end of file diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml new file mode 100644 index 0000000..f162929 --- /dev/null +++ b/docs/mkdocs.yaml @@ -0,0 +1,178 @@ +# ---------------------- PROJECT SPECIFIC --------------------------- + +site_name: DataJoint Documentation +site_url: http://localhost/docs/elements/element-moseq +repo_url: https://github.com/datajoint/element-moseq +repo_name: datajoint/element-moseq +nav: + - Element MoSeq: index.md + - Data Pipeline: pipeline.md + - Tutorials: + - tutorials/index.md + - Tutorial Notebook: tutorials/tutorial.ipynb + - Concepts: concepts.md + - Key Partnerships: partnerships.md + - Roadmap: roadmap.md + - Citation: citation.md + - API: api/ # defer to gen-files + literate-nav + - Changelog: changelog.md + +# --------------------- NOTES TO CONTRIBUTORS ----------------------- +# Markdown in mkdocs +# 01. Redering concatenates across single line breaks. This means... +# A. We have to be careful to add extra line breaks around paragraphs, +# including between the end of a pgf and the beginning of bullets. +# B. We can use hard wrapping to make github reviews easier to read. +# VSCode Rewrap extension offers a keyboard shortcut for hard wrap +# at the ruler, but don't add breaks in [multiword links](example.com) +# 02. Instead of designating codeblocks with bash, use console. For example.. +# ```console +# cd ../my_dir +# ``` +# 03. Links across docs should ... +# A. Not involve line breaks. +# B. Use relative paths to docs in the same repo +# C. Use lowercase and hyphens not spaces: [sub headings](./doc#sub-heading) +# +# Files +# 01. Add a soft link to your changelog with the following +# ```console +# ln -s ../../CHANGELOG.md ./docs/src/changelog.md +# ``` +# +# Site rendering +# 01. Deploy locally to localhost with the command +# ```console +# MODE="LIVE" PACKAGE=element_{ELEMENT} \ +# UPSTREAM_REPO=https://github.com/datajoint/element-{ELEMENT}.git \ +# HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build +# ``` +# 02. The API section will pull docstrings. +# A. Follow google styleguide e.g., +# https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html +# With typing suggestions: https://docs.python.org/3/library/typing.html +# B. To pull a specific workflow fork, change ./docs/src/api/make_pages.py#L19 +# 03. To see your fork of the workflow-{element} in this render, change the +# URL in ./docs/src/api/make_pages.py#L19 to your fork. +# 04. To deploy this site on your fork, +# A. declare a branch called gh-pages +# B. go to the your fork > settings > pages +# C. direct pages to render from the gh-pages branch at root +# D. push a tag to your fork with the format test*.*.* +# +# ---------------------------- STANDARD ----------------------------- +edit_uri: ./edit/main/docs/src +docs_dir: ./src +theme: + font: + text: Roboto Slab + code: Source Code Pro + name: material + custom_dir: src/.overrides + icon: + logo: main/company-logo + favicon: assets/images/company-logo-blue.png + features: + - toc.integrate + - content.code.annotate + palette: + - media: "(prefers-color-scheme: light)" + scheme: datajoint + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to light mode +plugins: + - markdownextradata: {} + - search + - mkdocstrings: + default_handler: python + handlers: + python: + options: + members_order: source + group_by_category: false + line_length: 88 + - gen-files: + scripts: + - ./src/api/make_pages.py + - literate-nav: + nav_file: navigation.md + - exclude-search: + exclude: + - "*/navigation.md" + - mkdocs-jupyter: + ignore_h1_titles: True + include: ["*.ipynb"] + - section-index +markdown_extensions: + - attr_list + - md_in_html + - toc: + permalink: true + - pymdownx.emoji: + options: + custom_icons: + - .overrides/.icons + - mdx_truly_sane_lists + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + - pymdownx.highlight: + linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.arithmatex: + generic: true + - pymdownx.magiclink # Displays bare URLs as links + - pymdownx.tasklist: # Renders check boxes in tasks lists + custom_checkbox: true +extra: + PATCH_VERSION: !ENV PATCH_VERSION + generator: false # Disable watermark + version: + provider: mike + social: + - icon: main/company-logo + link: https://www.datajoint.com + name: DataJoint + - icon: fontawesome/brands/slack + link: https://datajoint.slack.com + name: Slack + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/datajoint + name: LinkedIn + - icon: fontawesome/brands/twitter + link: https://twitter.com/datajoint + name: Twitter + - icon: fontawesome/brands/github + link: https://github.com/datajoint + name: GitHub + - icon: fontawesome/brands/docker + link: https://hub.docker.com/u/datajoint + name: DockerHub + - icon: fontawesome/brands/python + link: https://pypi.org/user/datajointbot + name: PyPI + - icon: fontawesome/brands/stack-overflow + link: https://stackoverflow.com/questions/tagged/datajoint + name: StackOverflow + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/channel/UCdeCuFOTCXlVMRzh6Wk-lGg + name: YouTube +extra_css: + - assets/stylesheets/extra.css + +extra_javascript: + - https://js-na1.hs-scripts.com/23133402.js # HubSpot chatbot + - javascripts/mathjax.js + - https://polyfill.io/v3/polyfill.min.js?features=es6 + - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js diff --git a/docs/src/.overrides/.icons/main/company-logo.svg b/docs/src/.overrides/.icons/main/company-logo.svg new file mode 100644 index 0000000..e876313 --- /dev/null +++ b/docs/src/.overrides/.icons/main/company-logo.svg @@ -0,0 +1,11 @@ + + + + + + + + diff --git a/docs/src/.overrides/.icons/main/project-logo-black.svg b/docs/src/.overrides/.icons/main/project-logo-black.svg new file mode 100644 index 0000000..76bebb1 --- /dev/null +++ b/docs/src/.overrides/.icons/main/project-logo-black.svg @@ -0,0 +1,22 @@ + + + + Asset 3 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/src/.overrides/404.html b/docs/src/.overrides/404.html new file mode 100644 index 0000000..e4c84db --- /dev/null +++ b/docs/src/.overrides/404.html @@ -0,0 +1,19 @@ +{% extends "main.html" %} + + +{% block content %} +

🚧 Not Found 👷

+

+ Unfortunately, we could not find what you were looking for. +
+
+ Usually there are two possibilities for this: +
+

+
+Please make sure you are navigating to the correct address. +

+{% endblock %} diff --git a/docs/src/.overrides/assets/images/company-logo-blue.png b/docs/src/.overrides/assets/images/company-logo-blue.png new file mode 100644 index 0000000..d15194b Binary files /dev/null and b/docs/src/.overrides/assets/images/company-logo-blue.png differ diff --git a/docs/src/.overrides/assets/stylesheets/extra.css b/docs/src/.overrides/assets/stylesheets/extra.css new file mode 100644 index 0000000..13d1c0a --- /dev/null +++ b/docs/src/.overrides/assets/stylesheets/extra.css @@ -0,0 +1,101 @@ +:root { + --dj-primary: #00a0df; + --dj-secondary: #ff5113; + --dj-background: #808285; + --dj-black: #000000; + --dj-white: #ffffff; +} + +/* footer previous/next navigation */ +.md-footer__inner:not([hidden]) { + display: none +} + +.md-typeset figure img { + display: inline; +} + +/* footer social icons */ +html a[title="DataJoint"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="Slack"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="LinkedIn"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="Twitter"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="GitHub"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="DockerHub"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="PyPI"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="StackOverflow"].md-social__link svg { + color: var(--dj-primary); +} +html a[title="YouTube"].md-social__link svg { + color: var(--dj-primary); +} + +[data-md-color-scheme="datajoint"] { + /* ribbon */ + /* ribbon + markdown heading expansion */ + --md-primary-fg-color: var(--dj-black); + /* ribbon text */ + --md-primary-bg-color: var(--dj-primary); + + /* navigation */ + /* navigation header + links */ + --md-typeset-a-color: var(--dj-primary); + /* navigation on hover + diagram outline */ + --md-accent-fg-color: var(--dj-secondary); + + /* main */ + /* main header + already viewed*/ + --md-default-fg-color--light: var(--dj-background); + /* primary text */ + --md-typeset-color: var(--dj-black); + /* code comments + diagram text */ + --md-code-fg-color: var(--dj-primary); + + /* footer */ + /* previous/next text */ + /* --md-footer-fg-color: var(--dj-primary); */ +} + +[data-md-color-scheme="slate"] { + /* ribbon */ + /* ribbon + markdown heading expansion */ + --md-primary-fg-color: var(--dj-primary); + /* ribbon text */ + --md-primary-bg-color: var(--dj-white); + + /* navigation */ + /* navigation header + links */ + --md-typeset-a-color: var(--dj-primary); + /* navigation on hover + diagram outline */ + --md-accent-fg-color: var(--dj-secondary); + + /* main */ + /* main header + already viewed*/ + /* --md-default-fg-color--light: var(--dj-background); */ + /* primary text */ + --md-typeset-color: var(--dj-white); + /* code comments + diagram text */ + --md-code-fg-color: var(--dj-primary); + + /* footer */ + /* previous/next text */ + /* --md-footer-fg-color: var(--dj-white); */ +} + +[data-md-color-scheme="slate"] .jupyter-wrapper .Table Td { + color: var(--dj-black) +} \ No newline at end of file diff --git a/docs/src/.overrides/partials/nav.html b/docs/src/.overrides/partials/nav.html new file mode 100644 index 0000000..8b179b4 --- /dev/null +++ b/docs/src/.overrides/partials/nav.html @@ -0,0 +1,33 @@ +{% set class = "md-nav md-nav--primary" %} +{% if "navigation.tabs" in features %} +{% set class = class ~ " md-nav--lifted" %} +{% endif %} +{% if "toc.integrate" in features %} +{% set class = class ~ " md-nav--integrated" %} +{% endif %} + \ No newline at end of file diff --git a/docs/src/api/make_pages.py b/docs/src/api/make_pages.py new file mode 100644 index 0000000..199ee4a --- /dev/null +++ b/docs/src/api/make_pages.py @@ -0,0 +1,27 @@ +"""Generate the api pages and navigation. +NOTE: Works best when following the Google style guide +https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html +https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings +""" + +import mkdocs_gen_files +from pathlib import Path +import os + +package = os.getenv("PACKAGE") + +element = package.split("_", 1)[1] + +nav = mkdocs_gen_files.Nav() +for path in sorted(Path(package).glob("**/*.py")): + if path.stem == "__init__" or path.stem == "version": + continue + with mkdocs_gen_files.open(f"api/{path.with_suffix('')}.md", "w") as f: + module_path = ".".join( + [p for p in path.with_suffix("").parts if p != "__init__"] + ) + print(f"::: {module_path}", file=f) + nav[path.parts] = f"{path.with_suffix('')}.md" + +with mkdocs_gen_files.open("api/navigation.md", "w") as nav_file: + nav_file.writelines(nav.build_literate_nav()) diff --git a/docs/src/citation.md b/docs/src/citation.md new file mode 100644 index 0000000..0148b90 --- /dev/null +++ b/docs/src/citation.md @@ -0,0 +1,13 @@ +# Citation + +If your work uses the following resources, please cite the respective manuscript and/or Research Resource Identifier (RRID): + ++ DataJoint Element MoSeq - Version {{ PATCH_VERSION }} + + Yatsenko D, Nguyen T, Shen S, Gunalan K, Turner CA, Guzman R, Sasaki M, Sitonic D, + Reimer J, Walker EY, Tolias AS. DataJoint Elements: Data Workflows for + Neurophysiology. bioRxiv. 2021 Jan 1. doi: https://doi.org/10.1101/2021.03.30.437358 + + + [RRID:SCR_021894](https://scicrunch.org/resolver/SCR_021894) + ++ Keypoint-MoSeq + + [Manuscripts](https://www.biorxiv.org/content/10.1101/2023.03.16.532307v2.full.pdf) diff --git a/docs/src/concepts.md b/docs/src/concepts.md new file mode 100644 index 0000000..5d9fc25 --- /dev/null +++ b/docs/src/concepts.md @@ -0,0 +1,28 @@ +# Concepts + +## Keypoint-MoSeq: Advanced Motion Sequencing through Pose Dynamics + +Keypoint-MoSeq[^1] introduces a novel machine learning platform tailored for identifying behavioral modules or "syllables" from keypoint data extracted from conventional video recordings of animal behavior. This innovative approach addresses the challenge posed by continuous keypoint data, prone to high-frequency jitter, often mistaken for transitions between behavioral states by conventional clustering algorithms. To overcome this hurdle, Keypoint-MoSeq leverages a generative model adept at discerning between keypoint noise and genuine behavior, facilitating precise identification of syllables marked by natural sub-second discontinuities inherent in mouse behavior. + +While keypoint tracking methods have significantly advanced the quantification of animal movement kinematics, the task of clustering behavioral data into discrete modules remains complex. Such clustering is vital for creating ethograms that delineate the sequential expression of behavioral modules. Existing methods vary in logic and assumptions, yielding diverse descriptions of identical behavior. Motion Sequencing (MoSeq)[^2] stands out as a validated technique for identifying behavioral modules and their temporal sequences using unsupervised machine learning. However, conventional MoSeq is tailored for depth camera data and faces challenges with high-frequency keypoint jitter. + +To address the limitations of traditional MoSeq when applied to keypoint data, Keypoint-MoSeq emerges as a promising solution. This new model enables simultaneous inference of keypoint positions and associated behavioral syllables, facilitating the identification of behavioral structure across diverse experimental settings without necessitating specialized hardware. Keypoint-MoSeq excels over alternative clustering methods in accurately delineating behavioral transitions, capturing neural activity correlations, and identifying complex features of solitary and social behavior. Its flexibility and accessibility, with freely available code for academic use[^3], promise widespread adoption and further innovation in behavioral analysis methods. + +[^1]: Weinreb, C., Pearl, J., Lin, S., Osman, M. A. M., Zhang, L., Annapragada, S., Conlin, E., Hoffman, R., Makowska, S., Gillis, W. F., Jay, M., Ye, S., Mathis, A., Mathis, M. W., Pereira, T., Linderman, S. W., & Datta, S. R. (2023). Keypoint-MoSeq: parsing behavior by linking point tracking to pose dynamics. bioRxiv : the preprint server for biology, 2023.03.16.532307. https://doi.org/10.1101/2023.03.16.532307 + +[^2]: Wiltschko, A. B., Johnson, M. J., Iurilli, G., Peterson, R. E., Katon, J. M., Pashkovski, S. L., ... & Datta, S. R. (2015). Mapping sub-second structure in mouse behavior. Neuron, 88(6), 1121-1135. + +[^3]: www.MoSeq4all.org + +## Element Features + +Through our interviews and direct collaborations, we identified the core motifs to construct Element MoSeq. + +Key features include: +- Ingestion and storage of input video metadata +- Loading and formatting of 2D deeplabcut keypoint tracking data for model training +- Queue management and initiation of Keypoint-MoSeq analysis across multiple sessions +- Ingestion of analysis outcomes such as PCA, AR-HMM, and Keypoint-SLDS components +- Ingestion of analysis outcomes from motion sequencing inference + + diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..d576894 --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,43 @@ +# Element MoSeq + +DataJoint Element for Motion Sequencing with +[Keypoint-MoSeq](https://github.com/dattalab/keypoint-moseq){:target="_blank"}, +from keypoint data extracted with [DeepLabCut](x){:target="_blank"}. DataJoint Elements collectively standardize and automate +data collection and analysis for neuroscience experiments. Each Element is a modular +pipeline for data storage and processing with corresponding database tables that can be +combined with other Elements to assemble a fully functional pipeline. + +## Experiment Flowchart + +![flowchart](https://raw.githubusercontent.com/datajoint/element-moseq/main/images/flowchart.svg) + +## Data Pipeline Diagram + +![pipeline](https://raw.githubusercontent.com/datajoint/element-moseq/main/images/pipeline.svg) + +## Getting Started + ++ Please fork the [repository](https://github.com/datajoint/element-moseq){:target="_blank"} + ++ Clone the repository to your computer + + ```bash + git clone https://github.com//element-moseq + ``` + ++ Install with `pip` + + ```bash + pip install -e . + ``` + ++ [Data Pipeline](./pipeline.md) - Pipeline and table descriptions + ++ [Tutorials](./tutorials/index.md) - Start building your data pipeline + ++ [Code Repository](https://github.com/datajoint/element-moseq/){:target="_blank"} + +## Support + ++ If you need help getting started or run into any errors, please contact our team by +email at support@datajoint.com. diff --git a/docs/src/partnerships.md b/docs/src/partnerships.md new file mode 100644 index 0000000..e6c4606 --- /dev/null +++ b/docs/src/partnerships.md @@ -0,0 +1,3 @@ +# Key partnerships + +Element MoSeq was developed in collaboration with the [Keypoint-MoSeq developers](https://github.com/dattalab/keypoint-moseq) in Datta's Lab at Harvard Medical School to promote integration and interoperability between Keypoint-MoSeq and the DataJoint Element MoSeq. diff --git a/docs/src/pipeline.md b/docs/src/pipeline.md new file mode 100644 index 0000000..23a57c6 --- /dev/null +++ b/docs/src/pipeline.md @@ -0,0 +1,84 @@ +# Data Pipeline + +Each node in the following diagram represents the analysis code in the pipeline and the +corresponding table in the database. Within the pipeline, Element MoSeq +connects to upstream Elements including Lab, Animal, Session, and Event. For more +detailed documentation on each table, see the API docs for the respective schemas. + +The Element is composed of two main schemas, `kpms_pca` and `kpms_model`. The `kpms_pca` schema is designed to handle the analysis and ingestion of PCA model for formatted keypoint tracking. The `kpms_model` schema is designed to handle the analysis and ingestion of Keypoint-MoSeq's motion sequencing on video recordings. + +## Diagrams + +### `kpms_pca` module + +- The `kpms_pca` schema is designed to handle the analysis and ingestion of a PCA model for formatted keypoint tracking. + + ![pipeline](https://raw.githubusercontent.com/datajoint/element-moseq/main/images/pipeline_kpms_pca.svg) + +### `kpms_model` module + +- The `kpms_model` schema is designed to handle the analysis and ingestion of Keypoint-MoSeq's motion sequencing on video recordings. + + ![pipeline](https://raw.githubusercontent.com/datajoint/element-moseq/main/images/pipeline_kpms_model.svg) + +## Table Descriptions + +### `lab` schema + +- For further details see the [lab schema API docs](https://datajoint.com/docs/elements/element-lab/latest/api/element_lab/lab/) + +| Table | Description | +| --- | --- | +| Device | Scanner metadata | + +### `subject` schema + +- Although not required, most choose to connect the `Session` table to a `Subject` table. + +- For further details see the [subject schema API docs](https://datajoint.com/docs/elements/element-animal/latest/api/element_animal/subject/) + +| Table | Description | +| --- | --- | +| Subject | Basic information of the research subject | + +### `session` schema + +- For further details see the [session schema API docs](https://datajoint.com/docs/elements/element-session/latest/api/element_session/session_with_datetime/) + +| Table | Description | +| --- | --- | +| Session | Unique experimental session identifier | + +### `kpms_pca` schema + +- For further details see the [kpms_pca schema API docs](https://datajoint.com/docs/elements/element-moseq/latest/api/element_moseq/kpms_pca/) + +| Table | Description | +| --- | --- | +| PoseEstimationMethod | Table to store the pose estimation methods supported by the keypoint loader of `keypoint-moseq` package. | +| KeypointSet | Table to store the keypoint data and video set directory to train the model.| +| KeypointSet.VideoFile | IDs and file paths of each video file that will be used to train the model.| +| Bodyparts | Table to store the body parts to use in the analysis.| +| PCATask | Staging table to define the PCA task and its output directory. | +| LoadKeypointSet | Table to create the `kpms_project_output_dir`, and create and update the `config.yml` by creating a new `dj_config.yml`. | +| PCAFitting | Automated fitting of the PCA model.| +| LatentDimension | Automated computation to calculate the latent dimension as one of the autoregressive hyperparameters (`ar_hypparams`) necessary for the model fitting. | + + +### `kpms_model` schema + +- For further details see the [kpms_model schema API docs](https://datajoint.com/docs/elements/element-moseq/latest/api/element_moseq/kpms_model/) + +| Table | Description | +| --- | --- | +| PreFittingTask | Table to specify the parameters for the pre-fitting (AR-HMM) of the model. | +| PreFitting | Automated computation to fit a AR-HMM model. | +| FullFittingTask | Table to specify the parameters for the full fitting of the model. The full model will generally require a lower value of kappa to yield the same target syllable durations. | +| FullFitting | Automated computation to fit the full model. | +| Model | Table to register the models. | +| VideoRecording | Set of video recordings for the Keypoint-MoSeq inference. | +| VideoRecording.File | File IDs and paths associated with a given `recording_id`. | +| InferenceTask | Table to specify the model, the video set, and the output directory for the inference task. | +| Inference | This table is used to infer the model results from the checkpoint file and save them to `{output_dir}/{model_name}/{inference_output_dir}/results.h5`. | +| Inference.MotionSequence | This table is used to store the results of the model inference.| +| Inference.GridMoviesSampledInstances | This table is used to store the grid movies sampled instances.| \ No newline at end of file diff --git a/docs/src/roadmap.md b/docs/src/roadmap.md new file mode 100644 index 0000000..364d017 --- /dev/null +++ b/docs/src/roadmap.md @@ -0,0 +1,3 @@ +# Roadmap + +Further development of this Element is community driven. Upon user requests and based on guidance from the Scientific Steering Group we will continue adding features to this Element. diff --git a/docs/src/tutorials/index.md b/docs/src/tutorials/index.md new file mode 100644 index 0000000..ed2e647 --- /dev/null +++ b/docs/src/tutorials/index.md @@ -0,0 +1,18 @@ +# Tutorials + ++ Element MoSeq includes an [interactive tutorial on GitHub Codespaces](https://github.com/datajoint/element-moseq#interactive-tutorial), which is configured for users to run the pipeline. + ++ DataJoint Elements are modular and can be connected into a complete pipeline. In the interactive tutorial is a example Jupyter notebook that combine five DataJoint Elements - Lab, Animal, Session, Event, and MoSeq. The notebook describes the pipeline and provides instructions for running the pipeline. For convenience, this notebook is also rendered on this website: + + [Tutorial notebook](tutorial.ipynb) + +## Installation Instructions for Active Projects + ++ The Element MoSeq described above can be modified for a user's specific experimental requirements and thereby used in active projects. + ++ The GitHub Codespace and Dev Container is configured for tutorials and prototyping. +We recommend users to configure a database specifically for production pipelines. Instructions for a local installation of the integrated development environment with a database can be found on the [User Guide](https://datajoint.com/docs/elements/user-guide/) page. + + +## Pose Estimation Method + ++ At present, behavioral segmentation analysis is compatible with keypoint data extracted with DeepLabCut with single-animal datasets. \ No newline at end of file diff --git a/element_moseq/__init__.py b/element_moseq/__init__.py new file mode 100644 index 0000000..7dbc508 --- /dev/null +++ b/element_moseq/__init__.py @@ -0,0 +1,21 @@ +import os +import datajoint as dj + +if "custom" not in dj.config: + dj.config["custom"] = {} + +# overwrite dj.config['custom'] values with environment variables if available + +dj.config["custom"]["database.prefix"] = os.getenv( + "DATABASE_PREFIX", dj.config["custom"].get("database.prefix", "") +) + +dj.config["custom"]["kpms_root_data_dir"] = os.getenv( + "KPMS_ROOT_DATA_DIR", dj.config["custom"].get("kpms_root_data_dir", "") +) + +dj.config["custom"]["kpms_processed_data_dir"] = os.getenv( + "KPMS_PROCESSED_DATA_DIR", dj.config["custom"].get("kpms_processed_data_dir", "") +) + +db_prefix = dj.config["custom"].get("database.prefix", "") diff --git a/element_moseq/kpms_model.py b/element_moseq/kpms_model.py new file mode 100644 index 0000000..317394d --- /dev/null +++ b/element_moseq/kpms_model.py @@ -0,0 +1,660 @@ +from datetime import datetime +import inspect +import os +from pathlib import Path +from typing import Optional + +from matplotlib import pyplot as plt + +import datajoint as dj +import importlib +from datajoint import DataJointError + +from element_moseq.kpms_pca import * + +from element_interface.utils import find_full_path +from .readers.kpms_reader import load_kpms_dj_config, generate_kpms_dj_config +from keypoint_moseq import update_hypparams, fit_model, load_checkpoint + + +schema = dj.schema() +_linking_module = None + + +def activate( + model_schema_name: str, + *, + create_schema: bool = True, + create_tables: bool = True, + linking_module: str = None, +): + """Activate this schema. + + Args: + model_schema_name (str): schema name on the database server + create_schema (bool): when True (default), create schema in the database if it + does not yet exist. + create_tables (bool): when True (default), create schema tables in the database + if they do not yet exist. + linking_module (str): a module (or name) containing the required dependencies. + + Dependencies: + Functions: + get_kpms_root_data_dir(): Returns absolute path for root data director(y/ies) + with all behavioral recordings, as (list of) string(s). + get_kpms_processed_data_dir(): Optional. Returns absolute path for processed + data. Defaults to session video subfolder. + """ + + if isinstance(linking_module, str): + linking_module = importlib.import_module(linking_module) + assert inspect.ismodule( + linking_module + ), "The argument 'dependency' must be a module's name or a module" + assert hasattr( + linking_module, "get_kpms_root_data_dir" + ), "The linking module must specify a lookup function for a root data directory" + + global _linking_module + _linking_module = linking_module + + # activate + schema.activate( + model_schema_name, + create_schema=create_schema, + create_tables=create_tables, + add_objects=_linking_module.__dict__, + ) + + +# -------------- Functions required by element-moseq --------------- + + +def get_kpms_root_data_dir() -> list: + """Pulls relevant func from parent namespace to specify root data dir(s). + + It is recommended that all paths in DataJoint Elements stored as relative + paths, with respect to some user-configured "root" director(y/ies). The + root(s) may vary between data modalities and user machines. Returns a full path + string or list of strings for possible root data directories. + """ + root_directories = _linking_module.get_kpms_root_data_dir() + if isinstance(root_directories, (str, Path)): + root_directories = [root_directories] + + if ( + hasattr(_linking_module, "get_kpms_processed_data_dir") + and get_kpms_processed_data_dir() not in root_directories + ): + root_directories.append(_linking_module.get_kpms_processed_data_dir()) + + return root_directories + + +def get_kpms_processed_data_dir() -> Optional[str]: + """Pulls relevant func from parent namespace. Defaults to KPMS's project /videos/. + + Method in parent namespace should provide a string to a directory where KPMS output + files will be stored. If unspecified, output files will be stored in the + session directory 'videos' folder, per DeepLabCut default. + """ + if hasattr(_linking_module, "get_kpms_processed_data_dir"): + return _linking_module.get_kpms_processed_data_dir() + else: + return None + + +# ----------------------------- Table declarations ---------------------- + + +@schema +class PreFittingTask(dj.Manual): + """Table to specify the parameters for the pre-fitting (AR-HMM) of the model. + + Attributes: + kpms_pca.PCAFitting (foreign key) : PCA fitting task. + pre_latent_dim (int) : Number of latent dimensions to use for the model pre-fitting. + pre_kappa (int) : Kappa value to use for the model pre-fitting. + pre_num_iterations (int) : Number of Gibbs sampling iterations to run in the model pre-fitting. + pre_fitting_desc(varchar) : User-defined description of the pre-fitting task. + """ + + definition = """ + -> kpms_pca.PCAFitting # PCAFitting Key + pre_latent_dim : int # Number of latent dimensions to use for the model pre-fitting + pre_kappa : int # Kappa value to use for the model pre-fitting + pre_num_iterations : int # Number of Gibbs sampling iterations to run in the model pre-fitting. + --- + pre_fitting_desc='' : varchar(1000) # User-defined description of the pre-fitting task + """ + + +@schema +class PreFitting(dj.Computed): + """Automated computation to fit a AR-HMM model. + + Attributes: + PreFittingTask (foreign key) : PreFittingTask Key. + model_name (varchar) : Name of the model as "kpms_project_output_dir/model_name". + pre_fitting_duration (time) : Time duration of the model fitting computation. + """ + + definition = """ + -> PreFittingTask # PreFittingTask Key + --- + model_name='' : varchar(100) # Name of the model as "kpms_project_output_dir/model_name" + pre_fitting_duration=NULL : time # Time duration of the model fitting computation + """ + + def make(self, key): + """ + Make function to fit the AR-HMM model using the latent trajectory defined by `model['states']['x']. + + Args: + key (dict) : dictionary with the `PreFittingTask` Key. + + Raises: + + High-level Logic: + 1. Fetch the `kpms_project_output_dir` and the model parameters from the `PreFittingTask` table + 2. Update the `dj_config.yml` with the selected latent dimension and kappa for the AR-HMM fitting. + 3. Load the pca model + 4. Fetch `coordinates` and `confidences` scores to format the data for the model initialization. \ + # Data - contains the data for model fitting. \ + # Metadata - contains the recordings and start/end frames for the data. + 5. Initialize the model that create a `model` dict containing states, parameters, hyperparameters, noise prior, and random seed. + 6. Update the model dict with the selected kappa for the AR-HMM fitting + 7. Fit the AR-HMM model using the `pre_num_iterations` and create a subdirectory in `kpms_project_output_dir` with the model's latest checkpoint + 8. Calculate the duration of the model fitting computation and insert it in the `PreFitting` table + """ + + kpms_project_output_dir = (PCATask & key).fetch1("kpms_project_output_dir") + kpms_project_output_dir = ( + get_kpms_processed_data_dir() / kpms_project_output_dir + ) + + pre_latent_dim, pre_kappa, pre_num_iterations = (PreFittingTask & key).fetch1( + "pre_latent_dim", "pre_kappa", "pre_num_iterations" + ) + + kpms_dj_config = load_kpms_dj_config( + kpms_project_output_dir.as_posix(), check_if_valid=True, build_indexes=True + ) + kpms_dj_config.update( + dict(latent_dim=int(pre_latent_dim), kappa=int(pre_kappa)) + ) + generate_kpms_dj_config(kpms_project_output_dir.as_posix(), **kpms_dj_config) + + from keypoint_moseq import load_pca, format_data, init_model, update_hypparams + + pca = load_pca(kpms_project_output_dir.as_posix()) + + coordinates, confidences = (LoadKeypointSet & key).fetch1( + "coordinates", "confidences" + ) + data, metadata = format_data(coordinates, confidences, **kpms_dj_config) + + model = init_model(data=data, metadata=metadata, pca=pca, **kpms_dj_config) + + model = update_hypparams( + model, kappa=int(pre_kappa), latent_dim=int(pre_latent_dim) + ) + + start_time = datetime.now() + model, model_name = fit_model( + model=model, + data=data, + metadata=metadata, + project_dir=kpms_project_output_dir.as_posix(), + ar_only=True, + num_iters=pre_num_iterations, + ) + end_time = datetime.now() + + duration_seconds = (end_time - start_time).total_seconds() + hours, remainder = divmod(duration_seconds, 3600) + minutes, seconds = divmod(remainder, 60) + duration_formatted = "{:02}:{:02}:{:02}".format( + int(hours), int(minutes), int(seconds) + ) + self.insert1( + { + **key, + "model_name": ( + kpms_project_output_dir.relative_to(get_kpms_processed_data_dir()) + / model_name + ).as_posix(), + "pre_fitting_duration": duration_formatted, + } + ) + + +@schema +class FullFittingTask(dj.Manual): + """Table to specify the parameters for the full fitting of the model. The full model will generally require a lower value of kappa to yield the same target syllable durations. + + Attributes: + kpms_pca.PCAFitting (foreign key) : PCAFitting Key. + full_latent_dim (int) : Number of latent dimensions to use for the model full fitting. + full_kappa (int) : Kappa value to use for the model full fitting. + full_num_iterations (int) : Number of Gibbs sampling iterations to run in the model full fitting. + full_fitting_desc(varchar) : User-defined description of the model full fitting task. + + """ + + definition = """ + -> kpms_pca.PCAFitting # PCAFitting Key + full_latent_dim : int # Number of latent dimensions to use for the model full fitting + full_kappa : int # Kappa value to use for the model full fitting + full_num_iterations : int # Number of Gibbs sampling iterations to run in the model full fitting. + --- + full_fitting_desc='' : varchar(1000) # User-defined description of the model full fitting task + """ + + +@schema +class FullFitting(dj.Computed): + """Automated computation to fit the full model. + + Attributes: + FullFittingTask (foreign key) : FullFittingTask Key. + model_name : varchar(100) # Name of the full-fitted model (output_dir/model_name) + full_fitting_duration (time) : Time duration of the full fitting model + """ + + definition = """ + -> FullFittingTask # FullFittingTask Key + --- + model_name : varchar(100) # Name of the full-fitted model (output_dir/model_name) + full_fitting_duration=NULL : time # Time duration of the full fitting model + """ + + def make(self, key): + """ + Make function to fit the full (keypoint-SLDS) model + + Args: + key (dict): dictionary with the `FullFittingTask` Key. + + Raises: + + High-level Logic: + 1. Fetch the `kpms_project_output_dir` and the model parameters from the `FullFittingTask` table + 2. Update the `dj_config.yml` with the selected latent dimension and kappa for the full-fitting. + 3. Initialize and fit the full model in a new `model_name` directory + 4. Load the pca and fetch the `coordinates` and `confidences` scores to format the data for the model initialization + 5. Initialize the model that create a `model` dict containing states, parameters, hyperparameters, noise prior, and random seed. + 6. Update the model dict with the selected kappa for the AR-HMM fitting + 7. Fit the AR-HMM model using the `full_num_iterations` and create a subdirectory in `kpms_project_output_dir` with the model's latest checkpoint + 8. Reindex syllable labels by their frequency in the most recent model snapshot in a checkpoint file. \ + This function permutes the states and parameters of a saved checkpoint so that syllables are labeled \ + in order of frequency (i.e. so that 0 is the most frequent, 1 is the second most, and so on). + 8. Calculate the duration of the model fitting computation and insert it in the `PreFitting` table + """ + + kpms_project_output_dir = (PCATask & key).fetch1("kpms_project_output_dir") + kpms_project_output_dir = ( + get_kpms_processed_data_dir() / kpms_project_output_dir + ) + + full_latent_dim, full_kappa, full_num_iterations = ( + FullFittingTask & key + ).fetch1("full_latent_dim", "full_kappa", "full_num_iterations") + + kpms_dj_config = load_kpms_dj_config( + kpms_project_output_dir.as_posix(), check_if_valid=True, build_indexes=True + ) + kpms_dj_config.update( + dict(latent_dim=int(full_latent_dim), kappa=int(full_kappa)) + ) + generate_kpms_dj_config(kpms_project_output_dir.as_posix(), **kpms_dj_config) + + from keypoint_moseq import ( + load_pca, + format_data, + init_model, + reindex_syllables_in_checkpoint, + ) + + pca = load_pca(kpms_project_output_dir.as_posix()) + coordinates, confidences = (LoadKeypointSet & key).fetch1( + "coordinates", "confidences" + ) + data, metadata = format_data(coordinates, confidences, **kpms_dj_config) + model = init_model(data=data, metadata=metadata, pca=pca, **kpms_dj_config) + model = update_hypparams( + model, kappa=int(full_kappa), latent_dim=int(full_latent_dim) + ) + + start_time = datetime.utcnow() + model, model_name = fit_model( + model=model, + data=data, + metadata=metadata, + project_dir=kpms_project_output_dir.as_posix(), + ar_only=False, + num_iters=full_num_iterations, + ) + end_time = datetime.utcnow() + duration_seconds = (end_time - start_time).total_seconds() + hours, remainder = divmod(duration_seconds, 3600) + minutes, seconds = divmod(remainder, 60) + duration_formatted = "{:02}:{:02}:{:02}".format( + int(hours), int(minutes), int(seconds) + ) + + reindex_syllables_in_checkpoint( + kpms_project_output_dir.as_posix(), Path(model_name).parts[-1] + ) + + self.insert1( + { + **key, + "model_name": ( + kpms_project_output_dir.relative_to(get_kpms_processed_data_dir()) + / model_name + ).as_posix(), + "full_fitting_duration": duration_formatted, + } + ) + + +@schema +class Model(dj.Manual): + """Table to register the models. + + Attributes: + model_name (varchar) : Generated model name (output_dir/model_name) + latent_dim (int) : Number of latent dimensions of the model + kappa (int) : Kappa value of the model + + """ + + definition = """ + model_name : varchar(64) # Generated model name (output_dir/model_name) + --- + latent_dim : int # Number of latent dimensions of the model + kappa : int # Kappa value of the model + """ + + +@schema +class VideoRecording(dj.Manual): + """Set of video recordings for the Keypoint-MoSeq inference. + + Attributes: + Session (foreign key) : Session primary key. + PoseEstimationMethod (foreign key) : Pose estimation method. + recording_id (int) : Unique ID for each recording. + """ + + definition = """ + -> Session # Session primary key + -> kpms_pca.PoseEstimationMethod # Pose estimation method + recording_id: int # Unique ID for each recording + """ + + class File(dj.Part): + """File IDs and paths associated with a given `recording_id`. + + Attributes: + VideoRecording (foreign key) : Video recording primary key. + file_id(int) : Unique ID for each file. + file_path (varchar) : Filepath of each video, relative to root data directory. + """ + + definition = """ + -> master + file_id: int # Unique ID for each file + --- + file_path: varchar(1000) # Filepath of each video, relative to root data directory. + """ + + +@schema +class InferenceTask(dj.Manual): + """Table to specify the model, the video set, and the output directory for the inference task + + Attributes: + -> VideoRecording : Video recording primary key + -> Model : Model primary key + inference_output_dir (varchar) : Sub-directory where the results will be stored + inference_desc (varchar) : User-defined description of the inference task + num_iterations (int) : Number of iterations to use for the model inference. If null, the default number internally is 50. + """ + + definition = """ + -> VideoRecording # Video recording primary key + -> Model # Model primary key + --- + inference_output_dir='' : varchar(1000) # Optional. Sub-directory where the results will be stored + inference_desc='' : varchar(1000) # Optional. User-defined description of the inference task + num_iterations=NULL : int # Optional. Number of iterations to use for the model inference. If null, the default number internally is 50. + """ + + +@schema +class Inference(dj.Computed): + """This table is used to infer the model results from the checkpoint file and save them to `{output_dir}/{model_name}/{inference_output_dir}/results.h5`. + + Attributes: + -> InferenceTask : InferenceTask primary key + inference_duration (time) : Time duration of the inference computation + """ + + definition = """ + -> InferenceTask # InferenceTask primary key + --- + inference_duration=NULL : time # Time duration of the inference computation + """ + + class MotionSequence(dj.Part): + """This table is used to store the results of the model inference. + + Attributes: + video_name (varchar) : Name of the video + syllable (longblob) : Syllable labels (z). The syllable label assigned to each frame (i.e. the state indexes assigned by the model). + latent_state (longblob) : Inferred low-dim pose state (x). Low-dimensional representation of the animal's pose in each frame. These are similar to PCA scores, are modified to reflect the pose dynamics and noise estimates inferred by the model. + centroid (longblob) : Inferred centroid (v). The centroid of the animal in each frame, as estimated by the model. + heading (longblob) : Inferred heading (h). The heading of the animal in each frame, as estimated by the model. + """ + + definition = """ + -> master + video_name : varchar(150) # Name of the video + --- + syllable : longblob # Syllable labels (z). The syllable label assigned to each frame (i.e. the state indexes assigned by the model). + latent_state : longblob # Inferred low-dim pose state (x). Low-dimensional representation of the animal's pose in each frame. These are similar to PCA scores, are modified to reflect the pose dynamics and noise estimates inferred by the model. + centroid : longblob # Inferred centroid (v). The centroid of the animal in each frame, as estimated by the model. + heading : longblob # Inferred heading (h). The heading of the animal in each frame, as estimated by the model. + """ + + class GridMoviesSampledInstances(dj.Part): + """This table is used to store the grid movies sampled instances. + + Attributes: + syllable (int) : Syllable label + instances (longblob) : List of instances shown in each in grid movie (in row-major order), where each instance is specified as a tuple with the video name, start frame and end frame. + """ + + definition = """ + -> master + syllable: int # Syllable label + --- + instances: longblob # List of instances shown in each in grid movie (in row-major order), where each instance is specified as a tuple with the video name, start frame and end frame. + """ + + def make(self, key): + """ + This function is used to infer the model results from the checkpoint file and save them to `{output_dir}/{model_name}/{inference_output_dir}/results.h5`. + + Args: + key (dict): Primary key from the InferenceTask table. + + Raises: + NotImplementedError: If the format method is not `deeplabcut`. + + High-level Logic: + 1. Fetch the `inference_output_dir` where the results will be stored, and if it is not present, create it. + 2. Fetch the `model_name` and the `num_iterations` from the `InferenceTask` table + 3. Load the most recent model checkpoint and the pca model + 4. Load the new keypoint data as `filepath_patterns` and format the data + 5. Initialize and apply the model with the new keypoint data + 6. If the `num_iterations` is set, fit the model with the new keypoint data for `num_iterations` iterations; otherwise, fit the model with the default number of iterations (50) + 7. Save the results as a CSV file and store the histogram showing the frequency of each syllable + 8. Generate and save the plots showing the median trajectory of poses associated with each given syllable. + 9. Generate and save video clips showing examples of each syllable. + 10. Generate and save the dendrogram representing distances between each syllable's median trajectory. + 11. Insert the inference duration in the `Inference` table + 12. Insert the results in the `MotionSequence` and `GridMoviesSampledInstances` tables + """ + + from keypoint_moseq import ( + load_pca, + load_keypoints, + format_data, + apply_model, + save_results_as_csv, + plot_syllable_frequencies, + generate_trajectory_plots, + generate_grid_movies, + plot_similarity_dendrogram, + ) + + inference_output_dir, model_name, num_iterations = (InferenceTask & key).fetch1( + "inference_output_dir", "model_name", "num_iterations" + ) + inference_output_full_dir = ( + get_kpms_processed_data_dir() / model_name / inference_output_dir + ) + if not os.path.exists(inference_output_full_dir): + os.makedirs(inference_output_full_dir) + + model_full_path = get_kpms_processed_data_dir() / model_name + format_method = (VideoRecording & key).fetch1("format_method") + file_paths = (VideoRecording.File & key).fetch("file_path") + + pca = load_pca(model_full_path.parent.as_posix()) + model = load_checkpoint( + project_dir=model_full_path.parent, model_name=Path(model_full_path).name + )[0] + + filepath_patterns = [] + for path in file_paths: + full_path = find_full_path(get_kpms_root_data_dir(), path) + temp = ( + Path(full_path).parent + / (os.path.splitext(os.path.basename(path))[0] + "*") + ).as_posix() + filepath_patterns.append(temp) + kpms_dj_config = load_kpms_dj_config( + model_full_path.parent.as_posix(), check_if_valid=True, build_indexes=True + ) + + if format_method == "deeplabcut": + coordinates, confidences, _ = load_keypoints( + filepath_pattern=filepath_patterns, format=format_method + ) + else: + raise NotImplementedError( + "The currently supported format method is `deeplabcut`. If you require \ + support for another format method, please reach out to us at `support@datajoint.com`." + ) + + data, metadata = format_data(coordinates, confidences, **kpms_dj_config) + + if num_iterations: + start_time = datetime.utcnow() + results = apply_model( + model=model, + data=data, + metadata=metadata, + pca=pca, + project_dir=model_full_path.parent.as_posix(), + model_name=Path(model_full_path).name, + results_path=(inference_output_full_dir / "results.h5").as_posix(), + return_model=False, + num_iters=num_iterations, + **kpms_dj_config, + ) + end_time = datetime.utcnow() + else: + start_time = datetime.utcnow() + results = apply_model( + model=model, + data=data, + metadata=metadata, + pca=pca, + project_dir=model_full_path.parent.as_posix(), + model_name=Path(model_full_path).name, + results_path=(inference_output_full_dir / "results.h5").as_posix(), + return_model=False, + **kpms_dj_config, + ) + end_time = datetime.utcnow() + + duration_seconds = (end_time - start_time).total_seconds() + hours, remainder = divmod(duration_seconds, 3600) + minutes, seconds = divmod(remainder, 60) + duration_formatted = "{:02}:{:02}:{:02}".format( + int(hours), int(minutes), int(seconds) + ) + + save_results_as_csv( + results=results, + project_dir=model_full_path.parent.as_posix(), + model_name=Path(model_full_path).name, + save_dir=(inference_output_full_dir / "results_as_csv").as_posix(), + ) + + fig, _ = plot_syllable_frequencies( + results=results, path=inference_output_full_dir.as_posix() + ) + fig.savefig(inference_output_full_dir / "syllable_frequencies.png") + plt.close(fig) + + generate_trajectory_plots( + coordinates=coordinates, + results=results, + project_dir=model_full_path.parent.as_posix(), + model_name=Path(model_name).parts[-1], + output_dir=(inference_output_full_dir / "trajectory_plots").as_posix(), + **kpms_dj_config, + ) + + sampled_instances = generate_grid_movies( + coordinates=coordinates, + results=results, + project_dir=model_full_path.parent.as_posix(), + model_name=Path(model_name).parts[-1], + output_dir=(inference_output_full_dir / "grid_movies").as_posix(), + **kpms_dj_config, + ) + + plot_similarity_dendrogram( + coordinates=coordinates, + results=results, + project_dir=model_full_path.parent.as_posix(), + model_name=Path(model_name).parts[-1], + save_path=(inference_output_full_dir / "similarity_dendogram").as_posix(), + **kpms_dj_config, + ) + + self.insert1({**key, "inference_duration": duration_formatted}) + + for results_idx in results.keys(): + self.MotionSequence.insert1( + { + **key, + "video_name": results_idx, + "syllable": results[results_idx]["syllable"], + "latent_state": results[results_idx]["latent_state"], + "centroid": results[results_idx]["centroid"], + "heading": results[results_idx]["heading"], + } + ) + + for syllable in sampled_instances.keys(): + self.GridMoviesSampledInstances.insert1( + {**key, "syllable": syllable, "instances": sampled_instances[syllable]} + ) diff --git a/element_moseq/kpms_pca.py b/element_moseq/kpms_pca.py new file mode 100644 index 0000000..fa14d5f --- /dev/null +++ b/element_moseq/kpms_pca.py @@ -0,0 +1,459 @@ +from datetime import datetime, timezone +import inspect +import os +from pathlib import Path +from typing import Optional + +import cv2 +import numpy as np + +import datajoint as dj +import importlib + +from element_interface.utils import find_full_path +from .readers.kpms_reader import generate_kpms_dj_config, load_kpms_dj_config + + +schema = dj.schema() +_linking_module = None + + +def activate( + pca_schema_name: str, + *, + create_schema: bool = True, + create_tables: bool = True, + linking_module: str = None, +): + """Activate this schema. + + Args: + pca_schema_name (str): A string containing the name of the pca schema. + create_schema (bool): If True (default), schema will be created in the database. + create_tables (bool): If True (default), tables related to the schema will be created in the database. + linking_module (str): A string containing the module name or module containing the required dependencies to activate the schema. + + Dependencies: + Functions: + get_kpms_root_data_dir(): Returns absolute path for root data director(y/ies) with all behavioral recordings, as (list of) string(s) + get_kpms_processed_data_dir(): Optional. Returns absolute path for processed data. Defaults to session video subfolder. + """ + + if isinstance(linking_module, str): + linking_module = importlib.import_module(linking_module) + assert inspect.ismodule( + linking_module + ), "The argument 'dependency' must be a module's name or a module" + + assert hasattr( + linking_module, "get_kpms_root_data_dir" + ), "The linking module must specify a lookup function for a root data directory" + + global _linking_module + _linking_module = linking_module + + # activate + schema.activate( + pca_schema_name, + create_schema=create_schema, + create_tables=create_tables, + add_objects=_linking_module.__dict__, + ) + + +# -------------- Functions required by the element-moseq --------------- + + +def get_kpms_root_data_dir() -> list: + """Pulls relevant func from parent namespace to specify root data dir(s). + + It is recommended that all paths in DataJoint Elements stored as relative + paths, with respect to some user-configured "root" director(y/ies). The + root(s) may vary between data modalities and user machines. Returns a full path + string or list of strings for possible root data directories. + """ + root_directories = _linking_module.get_kpms_root_data_dir() + if isinstance(root_directories, (str, Path)): + root_directories = [root_directories] + + if ( + hasattr(_linking_module, "get_kpms_processed_data_dir") + and get_kpms_processed_data_dir() not in root_directories + ): + root_directories.append(_linking_module.get_kpms_processed_data_dir()) + + return root_directories + + +def get_kpms_processed_data_dir() -> Optional[str]: + """Pulls relevant func from parent namespace. Defaults to KPMS's project /videos/. + + Method in parent namespace should provide a string to a directory where KPMS output + files will be stored. If unspecified, output files will be stored in the + session directory 'videos' folder, per DeepLabCut default. + """ + if hasattr(_linking_module, "get_kpms_processed_data_dir"): + return _linking_module.get_kpms_processed_data_dir() + else: + return None + + +# ----------------------------- Table declarations ---------------------- + + +@schema +class PoseEstimationMethod(dj.Lookup): + """Table to store the pose estimation methods supported by the keypoint loader of `keypoint-moseq` package. + + Attributes: + format_method (str): Pose estimation method (e.g. deeplabcut, sleap, etc.) + pose_estimation_desc (str): Pose estimation method description with the supported formats. + """ + + definition = """ + # Parameters used to obtain the keypoints data based on a specific pose estimation method. + format_method : char(15) # Supported pose estimation method (deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap) + --- + pose_estimation_desc : varchar(1000) # Optional. Pose estimation method description with the supported formats. + """ + + contents = [ + ["deeplabcut", "`.csv` and `.h5/.hdf5` files generated by DeepLabcut analysis"], + ["sleap", "`.slp` and `.h5/.hdf5` files generated by SLEAP analysis"], + ["anipose", "`.csv` files generated by anipose analysis"], + ["sleap-anipose", "`.h5/.hdf5` files generated by sleap-anipose analysis"], + ["nwb", "`.nwb` files with Neurodata Without Borders (NWB) format"], + ["facemap", "`.h5` files generated by Facemap analysis"], + ] + + +@schema +class KeypointSet(dj.Manual): + """Table to store the keypoint data and video set directory to train the model. + + Attributes: + kpset_id (int): Unique ID for each keypoint set. + PoseEstimationMethod (foreign key): Unique format method varchar used to obtain the keypoints data. + kpset_config_dir (str): Path relative to root data directory where the config file is located. + kpset_videos_dir (str): Path relative to root data directory where the videos and their keypoints are located. + kpset_desc (str): Optional. User-entered description. + """ + + definition = """ + kpset_id : int # Unique ID for each keypoint set + --- + -> PoseEstimationMethod # Unique format method used to obtain the keypoints data + kpset_config_dir : varchar(255) # Path relative to root data directory where the config file is located + kpset_videos_dir : varchar(255) # Path relative to root data directory where the videos and their keypoints are located + kpset_desc='' : varchar(300) # Optional. User-entered description + """ + + class VideoFile(dj.Part): + """IDs and file paths of each video file that will be used to train the model. + + Attributes: + video_id (int): Unique ID for each video. + video_path (str): Filepath of each video, relative to root data directory. + """ + + definition = """ + -> master + video_id : int # Unique ID for each video + --- + video_path : varchar(1000) # Filepath of each video, relative to root data directory + """ + + +@schema +class Bodyparts(dj.Manual): + """Table to store the body parts to use in the analysis. + + Attributes: + KeypointSet (foreign key) : Unique ID for each keypoint set. + bodyparts_id (int) : Unique ID for each bodypart. + bodyparts_desc(varchar) : Optional. User-entered description. + anterior_bodyparts (blob) : List of strings of anterior bodyparts + posterior_bodyparts (blob) : List of strings of posterior bodyparts + use_bodyparts (blob) : List of strings of bodyparts to be used + """ + + definition = """ + -> KeypointSet # Unique ID for each keypoint set + bodyparts_id : int # Unique ID for each bodypart + --- + bodyparts_desc='' : varchar(1000) # Optional. User-entered description. + anterior_bodyparts : blob # List of strings of anterior bodyparts + posterior_bodyparts : blob # List of strings of posterior bodyparts + use_bodyparts : blob # List of strings of bodyparts to be used + """ + + +@schema +class PCATask(dj.Manual): + """ + Staging table to define the PCA task and its output directory. + + Attributes: + Bodyparts (foreign key) : Bodyparts Key + kpms_project_output_dir (str) : KPMS's output directory relative to root + """ + + definition = """ + -> Bodyparts # Unique ID for each Bodyparts key + --- + kpms_project_output_dir='' : varchar(255) # KPMS's output directory relative to root + """ + + +@schema +class LoadKeypointSet(dj.Imported): + """ + Table to create the `kpms_project_output_dir`, and create and update the `config.yml` by creating a new `dj_config.yml`. + + Attributes: + PCATask (foreign key) : Unique ID for each PCATask. + coordinates (longblob) : Dictionary mapping filenames to keypoint coordinates as ndarrays of shape (n_frames, n_bodyparts, 2[or 3]) + confidences (longblob) : Dictionary mapping filenames to `likelihood` scores as ndarrays of shape (n_frames, n_bodyparts) + formatted_bodyparts (longblob) : List of bodypart names. The order of the names matches the order of the bodyparts in `coordinates` and `confidences`. + average_frame_rate (float0 : Average frame rate of the trained videos + """ + + definition = """ + -> PCATask # Unique ID for each PCATask + --- + coordinates : longblob # Dictionary mapping filenames to keypoint coordinates as ndarrays of shape (n_frames, n_bodyparts, 2[or 3]) + confidences : longblob # Dictionary mapping filenames to `likelihood` scores as ndarrays of shape (n_frames, n_bodyparts) + formatted_bodyparts : longblob # List of bodypart names. The order of the names matches the order of the bodyparts in `coordinates` and `confidences`. + average_frame_rate : float # Average frame rate of the trained videos + """ + + def make(self, key): + """ + Make function to: + 1. Generate and update the `dj_config.yml` with both the `video_dir` and the bodyparts. + 2. Create the keypoint coordinates and confidences scores to format the data for the PCA fitting. + + Args: + key (dict): Primary key from the PCATask table. + + Raises: + NotImplementedError: `format_method` is only supported for `deeplabcut`. If support required for another format method, reach out to us. + + High-Level Logic: + 1. Fetches the bodyparts, output_dir and keypoint method, and keypoint config and videoset directories. + 2. Creates the `kpms_project_output_dir` (if it does not exist), and generates the kpms default `config.yml` with the default values from the pose estimation (DLC) config. + 3. Create a copy of the kpms `config.yml` named `kpms_dj_config.yml` that will be updated with both the `video_dir` and bodyparts + 4. Calculate the `filepath_patterns` that will select the videos from `KeypointSet.VideoFile` as the training set. + 4. Load keypoint data for the selected training videoset. The coordinates and confidences scores will be used to format the data for modeling. + 5. Calculate the average frame rate of the videoset chosen to train the model. The average frame rate can be used to calculate the kappa value. + 6. Insert the results of this `make` function into the table. + """ + + anterior_bodyparts, posterior_bodyparts, use_bodyparts = ( + Bodyparts & key + ).fetch1( + "anterior_bodyparts", + "posterior_bodyparts", + "use_bodyparts", + ) + kpms_project_output_dir = (PCATask & key).fetch1("kpms_project_output_dir") + kpms_project_output_dir = ( + get_kpms_processed_data_dir() / kpms_project_output_dir + ) + + format_method, kpset_config_dir, kpset_videos_dir = (KeypointSet & key).fetch1( + "format_method", "kpset_config_dir", "kpset_videos_dir" + ) + + file_paths, video_ids = (KeypointSet.VideoFile & key).fetch( + "video_path", "video_id" + ) + + kpset_config_dir = find_full_path(get_kpms_root_data_dir(), kpset_config_dir) + kpset_videos_dir = find_full_path(get_kpms_root_data_dir(), kpset_videos_dir) + + from keypoint_moseq import setup_project, load_config, load_keypoints + + setup_project( + kpms_project_output_dir, deeplabcut_config=kpset_config_dir / "config.yaml" + ) + + kpms_config = load_config( + kpms_project_output_dir.as_posix(), check_if_valid=True, build_indexes=False + ) + + kpms_dj_config_kwargs_dict = dict( + video_dir=kpset_videos_dir.as_posix(), + anterior_bodyparts=anterior_bodyparts, + posterior_bodyparts=posterior_bodyparts, + use_bodyparts=use_bodyparts, + ) + kpms_config.update(**kpms_dj_config_kwargs_dict) + generate_kpms_dj_config(kpms_project_output_dir.as_posix(), **kpms_config) + + filepath_patterns = [ + ( + kpset_videos_dir / (os.path.splitext(os.path.basename(path))[0] + "*") + ).as_posix() + for path in file_paths + ] + + if format_method == "deeplabcut": + coordinates, confidences, formatted_bodyparts = load_keypoints( + filepath_pattern=filepath_patterns, format=format_method + ) + else: + raise NotImplementedError( + "The currently supported format method is `deeplabcut`. If you require \ + support for another format method, please reach out to us at `support at datajoint.com`." + ) + + fps_list = [] + for fp, video_id in zip(file_paths, video_ids): + file_path = (find_full_path(get_kpms_root_data_dir(), fp)).as_posix() + cap = cv2.VideoCapture(file_path) + fps_list.append(int(cap.get(cv2.CAP_PROP_FPS))) + cap.release() + average_frame_rate = int(np.mean(fps_list)) + + self.insert1( + dict( + **key, + coordinates=coordinates, + confidences=confidences, + formatted_bodyparts=formatted_bodyparts, + average_frame_rate=average_frame_rate, + ) + ) + + +@schema +class PCAFitting(dj.Computed): + """Automated fitting of the PCA model. + + Attributes: + LoadKeypointSet (foreign key) : LoadKeypointSet Key. + pca_fitting_time (datetime) : datetime of the PCA fitting analysis. + """ + + definition = """ + -> LoadKeypointSet # LoadKeypointSet Key + --- + pca_fitting_time=NULL : datetime # datetime of the PCA fitting analysis + """ + + def make(self, key): + """ + Make function to format the keypoint data, fit the PCA model, and store it as a `pca.p` file in the KPMS output directory. + + Args: + key (dict): LoadKeypointSet Key + + Raises: + + High-Level Logic: + 1. Fetch the `kpms_project_output_dir` from the PCATask table. + 2. Load the `kpms_dj_config` file that contains the updated `video_dir` and bodyparts, \ + and format the keypoint data with the coordinates and confidences scores to be used in the PCA fitting. + 3. Fit the PCA model and save it as `pca.p` file in the output directory. + 4.Insert the creation datetime as the `pca_fitting_time` into the table. + """ + + kpms_project_output_dir = (PCATask & key).fetch1("kpms_project_output_dir") + kpms_project_output_dir = ( + get_kpms_processed_data_dir() / kpms_project_output_dir + ) + + from keypoint_moseq import format_data, fit_pca, save_pca + + kpms_default_config = load_kpms_dj_config( + kpms_project_output_dir.as_posix(), check_if_valid=True, build_indexes=True + ) + coordinates, confidences = (LoadKeypointSet & key).fetch1( + "coordinates", "confidences" + ) + data, _ = format_data( + **kpms_default_config, coordinates=coordinates, confidences=confidences + ) + + pca = fit_pca(**data, **kpms_default_config) + save_pca(pca, kpms_project_output_dir.as_posix()) + + creation_datetime = datetime.now(timezone.utc) + self.insert1(dict(**key, pca_fitting_time=creation_datetime)) + + +@schema +class LatentDimension(dj.Imported): + """ + Automated computation to calculate the latent dimension as one of the autoregressive hyperparameters (`ar_hypparams`) \ + necessary for the model fitting. + The analysis aims to select each of the components that explain the 90% of variance (fixed threshold). + + Attributes: + PCAFitting (foreign key) : PCAFitting Key. + variance_percentage (float) : Variance threshold. Fixed value to 90%. + latent_dimension (int) : Number of principal components required to explain the specified variance. + latent_dim_desc (varchar) : Automated description of the computation result. + """ + + definition = """ + -> PCAFitting # PCAFitting Key + --- + variance_percentage : float # Variance threshold. Fixed value to 0.9 + latent_dimension : int # Number of principal components required to explain the specified variance. + latent_dim_desc : varchar(1000) # Automated description of the computation result. + """ + + def make(self, key): + """ + Make function to compute and store the latent dimensions that explain a 90% variance threshold. + + Args: + key (dict): PCAFitting Key. + + Raises: + + High-Level Logic: + 1. Fetches the output directory from the PCATask table and load the PCA model from the output directory. + 2. Set a specified variance threshold to 90% and compute the cumulative sum of the explained variance ratio. + 3. Determine the number of components required to explain the specified variance. + 3.1 If the cumulative sum of the explained variance ratio is less than the specified variance threshold, \ + it sets the `latent_dimension` to the total number of components and `variance_percentage` to the cumulative sum of the explained variance ratio. + 3.2 If the cumulative sum of the explained variance ratio is greater than the specified variance threshold, \ + it sets the `latent_dimension` to the number of components that explain the specified variance and `variance_percentage` to the specified variance threshold. + 4. Insert the results of this `make` function into the table. + """ + from keypoint_moseq import load_pca + + kpms_project_output_dir = (PCATask & key).fetch1("kpms_project_output_dir") + kpms_project_output_dir = ( + get_kpms_processed_data_dir() / kpms_project_output_dir + ) + + pca = load_pca(kpms_project_output_dir.as_posix()) + + variance_threshold = 0.90 + cs = np.cumsum( + pca.explained_variance_ratio_ + ) # explained_variance_ratio_ndarray of shape (n_components,) + + if cs[-1] < variance_threshold: + latent_dimension = len(cs) + variance_percentage = cs[-1] * 100 + latent_dim_desc = ( + f"All components together only explain {cs[-1]*100}% of variance." + ) + else: + latent_dimension = (cs > variance_threshold).nonzero()[0].min() + 1 + variance_percentage = variance_threshold * 100 + latent_dim_desc = f">={variance_threshold*100}% of variance explained by {(cs>variance_threshold).nonzero()[0].min()+1} components." + + self.insert1( + dict( + **key, + variance_percentage=variance_percentage, + latent_dimension=latent_dimension, + latent_dim_desc=latent_dim_desc, + ) + ) diff --git a/element_moseq/readers/kpms_reader.py b/element_moseq/readers/kpms_reader.py new file mode 100644 index 0000000..058fa27 --- /dev/null +++ b/element_moseq/readers/kpms_reader.py @@ -0,0 +1,186 @@ +import os +import logging +import yaml +import jax.numpy as jnp + +logger = logging.getLogger("datajoint") + + +def generate_kpms_dj_config(output_dir, **kwargs): + """This function mirrors the behavior of the `generate_config` function from the `keypoint_moseq` + package. Nonetheless, it produces a duplicate of the initial configuration file, titled + `kpms_dj_config.yml`, in the output directory to maintain the integrity of the original file. + This replicated file accommodates any customized project settings, with default configurations + utilized unless specified differently via keyword arguments. + + Args: + output_dir (str): Directory containing the `kpms_dj_config.yml` that will be generated. + kwargs (dict): Custom project settings. + """ + + def _build_yaml(sections, comments): + text_blocks = [] + for title, data in sections: + centered_title = f" {title} ".center(50, "=") + text_blocks.append(f"\n\n{'#'}{centered_title}{'#'}") + for key, value in data.items(): + text = yaml.dump({key: value}).strip("\n") + if key in comments: + text = f"\n{'#'} {comments[key]}\n{text}" + text_blocks.append(text) + return "\n".join(text_blocks) + + def _update_dict(new, original): + return {k: new[k] if k in new else v for k, v in original.items()} + + hypperams = _update_dict( + kwargs, + { + "error_estimator": {"slope": -0.5, "intercept": 0.25}, + "obs_hypparams": { + "sigmasq_0": 0.1, + "sigmasq_C": 0.1, + "nu_sigma": 1e5, + "nu_s": 5, + }, + "ar_hypparams": { + "latent_dim": 10, + "nlags": 3, + "S_0_scale": 0.01, + "K_0_scale": 10.0, + }, + "trans_hypparams": { + "num_states": 100, + "gamma": 1e3, + "alpha": 5.7, + "kappa": 1e6, + }, + "cen_hypparams": {"sigmasq_loc": 0.5}, + }, + ) + + hypperams = {k: _update_dict(kwargs, v) for k, v in hypperams.items()} + + anatomy = _update_dict( + kwargs, + { + "bodyparts": ["BODYPART1", "BODYPART2", "BODYPART3"], + "use_bodyparts": ["BODYPART1", "BODYPART2", "BODYPART3"], + "skeleton": [ + ["BODYPART1", "BODYPART2"], + ["BODYPART2", "BODYPART3"], + ], + "anterior_bodyparts": ["BODYPART1"], + "posterior_bodyparts": ["BODYPART3"], + }, + ) + + other = _update_dict( + kwargs, + { + "recording_name_suffix": "", + "verbose": False, + "conf_pseudocount": 1e-3, + "video_dir": "", + "keypoint_colormap": "autumn", + "whiten": True, + "fix_heading": False, + "seg_length": 10000, + }, + ) + + fitting = _update_dict( + kwargs, + { + "added_noise_level": 0.1, + "PCA_fitting_num_frames": 1000000, + "conf_threshold": 0.5, + # 'kappa_scan_target_duration': 12, + # 'kappa_scan_min': 1e2, + # 'kappa_scan_max': 1e12, + # 'num_arhmm_scan_iters': 50, + # 'num_arhmm_final_iters': 200, + # 'num_kpslds_scan_iters': 50, + # 'num_kpslds_final_iters': 500 + }, + ) + + comments = { + "verbose": "whether to print progress messages during fitting", + "keypoint_colormap": "colormap used for visualization; see `matplotlib.cm.get_cmap` for options", + "added_noise_level": "upper bound of uniform noise added to the data during initial AR-HMM fitting; this is used to regularize the model", + "PCA_fitting_num_frames": "number of frames used to fit the PCA model during initialization", + "video_dir": "directory with videos from which keypoints were derived (used for crowd movies)", + "recording_name_suffix": "suffix used to match videos to recording names; this can usually be left empty (see `util.find_matching_videos` for details)", + "bodyparts": "used to access columns in the keypoint data", + "skeleton": "used for visualization only", + "use_bodyparts": "determines the subset of bodyparts to use for modeling and the order in which they are represented", + "anterior_bodyparts": "used to initialize heading", + "posterior_bodyparts": "used to initialize heading", + "seg_length": "data are broken up into segments to parallelize fitting", + "trans_hypparams": "transition hyperparameters", + "ar_hypparams": "autoregressive hyperparameters", + "obs_hypparams": "keypoint observation hyperparameters", + "cen_hypparams": "centroid movement hyperparameters", + "error_estimator": "parameters to convert neural net likelihoods to error size priors", + "save_every_n_iters": "frequency for saving model snapshots during fitting; if 0 only final state is saved", + "kappa_scan_target_duration": "target median syllable duration (in frames) for choosing kappa", + "whiten": "whether to whiten principal components; used to initialize the latent pose trajectory `x`", + "conf_threshold": "used to define outliers for interpolation when the model is initialized", + "conf_pseudocount": "pseudocount used regularize neural network confidences", + "fix_heading": "whether to keep the heading angle fixed; this should only be True if the pose is constrained to a narrow range of angles, e.g. a headfixed mouse.", + } + + sections = [ + ("ANATOMY", anatomy), + ("FITTING", fitting), + ("HYPER PARAMS", hypperams), + ("OTHER", other), + ] + + with open(os.path.join(output_dir, "kpms_dj_config.yml"), "w") as f: + f.write(_build_yaml(sections, comments)) + + +def load_kpms_dj_config(output_dir, check_if_valid=True, build_indexes=True): + """ + This function mirrors the functionality of the `load_config` function from the `keypoint_moseq` + package. Similarly, this function loads the `kpms_dj_config.yml` from the output directory. + + Args: + output_dir (str): Directory containing the `kpms_dj_config.yml` that will be loaded. + check_if_valid (bool): default=True. Check if the config is valid using :py:func:`keypoint_moseq.io.check_config_validity` + build_indexes (bool): default=True. Add keys `"anterior_idxs"` and `"posterior_idxs"` to the config. Each maps to a jax array indexing the elements of `config["anterior_bodyparts"]` and `config["posterior_bodyparts"]` by their order in `config["use_bodyparts"]` + + Returns: + kpms_dj_config (dict): configuration settings + """ + + from keypoint_moseq import check_config_validity + + config_path = os.path.join(output_dir, "kpms_dj_config.yml") + + with open(config_path, "r") as f: + kpms_dj_config = yaml.safe_load(f) + + if check_if_valid: + check_config_validity(kpms_dj_config) + + if build_indexes: + kpms_dj_config["anterior_idxs"] = jnp.array( + [ + kpms_dj_config["use_bodyparts"].index(bp) + for bp in kpms_dj_config["anterior_bodyparts"] + ] + ) + kpms_dj_config["posterior_idxs"] = jnp.array( + [ + kpms_dj_config["use_bodyparts"].index(bp) + for bp in kpms_dj_config["posterior_bodyparts"] + ] + ) + + if not "skeleton" in kpms_dj_config or kpms_dj_config["skeleton"] is None: + kpms_dj_config["skeleton"] = [] + + return kpms_dj_config diff --git a/element_moseq/version.py b/element_moseq/version.py new file mode 100644 index 0000000..652faa3 --- /dev/null +++ b/element_moseq/version.py @@ -0,0 +1,4 @@ +""" +Package metadata +""" +__version__ = "0.1.0" diff --git a/images/flowchart.drawio b/images/flowchart.drawio new file mode 100644 index 0000000..c1b9bd3 --- /dev/null +++ b/images/flowchart.drawio @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/images/flowchart.svg b/images/flowchart.svg new file mode 100644 index 0000000..62cb73a --- /dev/null +++ b/images/flowchart.svg @@ -0,0 +1,4 @@ + + + +
Load keypoint data and body parts into pipeline
Load keypoint data and...
Synchronize data modalities & exploratory analysis
Synchronize data...
Visualize



 
Visualize...

 Export & publish

 
 
Export & publish...
Enter metadata
into pipeline
Enter metadata...
  Fit PCA, select latent dimension & kappa, 
and fit the model
Fit PCA, select laten...
 Film new videos & 
load metadata 
Film new videos &...
Pair videos with 
models & run 
inference
Pair videos with...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/images/pipeline.svg b/images/pipeline.svg new file mode 100644 index 0000000..f3ac42b --- /dev/null +++ b/images/pipeline.svg @@ -0,0 +1,294 @@ + + + + + +kpms_pca.PCAFitting + + +kpms_pca.PCAFitting + + + + + +kpms_model.FullFittingTask + + +kpms_model.FullFittingTask + + + + + +kpms_pca.PCAFitting->kpms_model.FullFittingTask + + + + +kpms_model.PreFittingTask + + +kpms_model.PreFittingTask + + + + + +kpms_pca.PCAFitting->kpms_model.PreFittingTask + + + + +kpms_pca.LatentDimension + + +kpms_pca.LatentDimension + + + + + +kpms_pca.PCAFitting->kpms_pca.LatentDimension + + + + +kpms_model.VideoRecording.File + + +kpms_model.VideoRecording.File + + + + + +kpms_model.PreFitting + + +kpms_model.PreFitting + + + + + +kpms_model.VideoRecording + + +kpms_model.VideoRecording + + + + + +kpms_model.VideoRecording->kpms_model.VideoRecording.File + + + + +kpms_model.InferenceTask + + +kpms_model.InferenceTask + + + + + +kpms_model.VideoRecording->kpms_model.InferenceTask + + + + +kpms_model.FullFitting + + +kpms_model.FullFitting + + + + + +kpms_model.FullFittingTask->kpms_model.FullFitting + + + + +kpms_pca.KeypointSet + + +kpms_pca.KeypointSet + + + + + +kpms_pca.KeypointSet.VideoFile + + +kpms_pca.KeypointSet.VideoFile + + + + + +kpms_pca.KeypointSet->kpms_pca.KeypointSet.VideoFile + + + + +kpms_pca.Bodyparts + + +kpms_pca.Bodyparts + + + + + +kpms_pca.KeypointSet->kpms_pca.Bodyparts + + + + +kpms_model.Model + + +kpms_model.Model + + + + + +kpms_model.Model->kpms_model.InferenceTask + + + + +kpms_model.Inference.GridMoviesSampledInstances + + +kpms_model.Inference.GridMoviesSampledInstances + + + + + +kpms_model.PreFittingTask->kpms_model.PreFitting + + + + +kpms_pca.PCATask + + +kpms_pca.PCATask + + + + + +kpms_pca.Bodyparts->kpms_pca.PCATask + + + + +kpms_model.Inference + + +kpms_model.Inference + + + + + +kpms_model.InferenceTask->kpms_model.Inference + + + + +kpms_pca.LoadKeypointSet + + +kpms_pca.LoadKeypointSet + + + + + +kpms_pca.PCATask->kpms_pca.LoadKeypointSet + + + + +subject.Subject + + +subject.Subject + + + + + +session.Session + + +session.Session + + + + + +subject.Subject->session.Session + + + + +session.Session->kpms_model.VideoRecording + + + + +kpms_pca.PoseEstimationMethod + + +kpms_pca.PoseEstimationMethod + + + + + +kpms_pca.PoseEstimationMethod->kpms_model.VideoRecording + + + + +kpms_pca.PoseEstimationMethod->kpms_pca.KeypointSet + + + + +kpms_pca.LoadKeypointSet->kpms_pca.PCAFitting + + + + +kpms_model.Inference->kpms_model.Inference.GridMoviesSampledInstances + + + + +kpms_model.Inference.MotionSequence + + +kpms_model.Inference.MotionSequence + + + + + +kpms_model.Inference->kpms_model.Inference.MotionSequence + + + + \ No newline at end of file diff --git a/images/pipeline_kpms_model.svg b/images/pipeline_kpms_model.svg new file mode 100644 index 0000000..b2cfe54 --- /dev/null +++ b/images/pipeline_kpms_model.svg @@ -0,0 +1,172 @@ + + + + + +kpms_model.VideoRecording + + +kpms_model.VideoRecording + + + + + +kpms_model.VideoRecording.File + + +kpms_model.VideoRecording.File + + + + + +kpms_model.VideoRecording->kpms_model.VideoRecording.File + + + + +kpms_model.InferenceTask + + +kpms_model.InferenceTask + + + + + +kpms_model.VideoRecording->kpms_model.InferenceTask + + + + +kpms_model.Model + + +kpms_model.Model + + + + + +kpms_model.Model->kpms_model.InferenceTask + + + + +kpms_model.Inference.GridMoviesSampledInstances + + +kpms_model.Inference.GridMoviesSampledInstances + + + + + +kpms_model.Inference + + +kpms_model.Inference + + + + + +kpms_model.Inference->kpms_model.Inference.GridMoviesSampledInstances + + + + +kpms_model.Inference.MotionSequence + + +kpms_model.Inference.MotionSequence + + + + + +kpms_model.Inference->kpms_model.Inference.MotionSequence + + + + +kpms_model.PreFittingTask + + +kpms_model.PreFittingTask + + + + + +kpms_model.PreFitting + + +kpms_model.PreFitting + + + + + +kpms_model.PreFittingTask->kpms_model.PreFitting + + + + +kpms_model.FullFittingTask + + +kpms_model.FullFittingTask + + + + + +kpms_model.FullFitting + + +kpms_model.FullFitting + + + + + +kpms_model.FullFittingTask->kpms_model.FullFitting + + + + +kpms_model.InferenceTask->kpms_model.Inference + + + + +subject.Subject + + +subject.Subject + + + + + +session.Session + + +session.Session + + + + + +subject.Subject->session.Session + + + + +session.Session->kpms_model.VideoRecording + + + + \ No newline at end of file diff --git a/images/pipeline_kpms_pca.svg b/images/pipeline_kpms_pca.svg new file mode 100644 index 0000000..4c8dbc0 --- /dev/null +++ b/images/pipeline_kpms_pca.svg @@ -0,0 +1,112 @@ + + + + + +kpms_pca.PCAFitting + + +kpms_pca.PCAFitting + + + + + +kpms_pca.LatentDimension + + +kpms_pca.LatentDimension + + + + + +kpms_pca.PCAFitting->kpms_pca.LatentDimension + + + + +kpms_pca.LoadKeypointSet + + +kpms_pca.LoadKeypointSet + + + + + +kpms_pca.LoadKeypointSet->kpms_pca.PCAFitting + + + + +kpms_pca.Bodyparts + + +kpms_pca.Bodyparts + + + + + +kpms_pca.PCATask + + +kpms_pca.PCATask + + + + + +kpms_pca.Bodyparts->kpms_pca.PCATask + + + + +kpms_pca.PCATask->kpms_pca.LoadKeypointSet + + + + +kpms_pca.KeypointSet + + +kpms_pca.KeypointSet + + + + + +kpms_pca.KeypointSet->kpms_pca.Bodyparts + + + + +kpms_pca.KeypointSet.VideoFile + + +kpms_pca.KeypointSet.VideoFile + + + + + +kpms_pca.KeypointSet->kpms_pca.KeypointSet.VideoFile + + + + +kpms_pca.PoseEstimationMethod + + +kpms_pca.PoseEstimationMethod + + + + + +kpms_pca.PoseEstimationMethod->kpms_pca.KeypointSet + + + + \ No newline at end of file diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb new file mode 100644 index 0000000..c5148c1 --- /dev/null +++ b/notebooks/tutorial.ipynb @@ -0,0 +1,5257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DataJoint Element for Motion Sequencing with Keypoint-MoSeq\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Open-source Data Pipeline for Motion Sequencing in Neurophysiology**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Welcome to the tutorial for the DataJoint Element for motion sequencing analysis. This tutorial aims to provide a comprehensive understanding of the open-source data pipeline by `element-moseq`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![pipeline](../images/flowchart.svg)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The package is designed to seamlessly integrate the **PCA fitting**, **model fitting** through **initialization**, **fitting an AR-HMM**, and **fitting the full keypoint-SLDS model** into a data pipeline and streamline model and video management using DataJoint.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![pipeline](../images/pipeline.svg)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By the end of this tutorial, you will have a clear grasp of how to set up and integrate the `Element MoSeq` into your specific research projects and your lab.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prerequisites\n", + "\n", + "Please see the [datajoint tutorials GitHub repository](https://github.com/datajoint/datajoint-tutorials/tree/main) proceeding.\n", + "A basic understanding of the following DataJoint concepts will be beneficial to your understanding of this tutorial:\n", + "\n", + "1. The `Imported` and `Computed` tables types in `datajoint-python`.\n", + "2. The functionality of the `.populate()` method.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Tutorial Overview**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Setup\n", + "- _Activate_ the DataJoint pipeline\n", + "- _Insert_ example data into subject and session tables\n", + "- _Insert_ the keypoint data from the pose estimation and the body parts in the DataJoint pipeline\n", + "- _Fit a PCA model_ to aligned and centered keypoint coordinates and _select_ the latent dimension\n", + "- _Fit the AR-HMM and Keypoint-SLDS Models_\n", + "- _Run the inference_ task and _visualize_ the results\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Setup**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial loads the keypoint data extracted by DeepLabCut of a single freely moving mouse in an open-field environment. The goal is to link this point tracking to pose dynamics by identifying its behavioral modules (\"syllables\") without human supervision. The modeling results are stored as a `.h5` file and a subdirectory of `.csv` files that contain the following information:\n", + "\n", + "- Behavior modules as \"syllables\": the syllable label assigned to each frame (i.e. the state indexes assigned by the model)\n", + "- Centroid and heading in each frame, as estimated by the model, that capture the animal's overall position in allocentric coordinates\n", + "- Latent state: low-dimensional representation of the animal's pose in each frame. These are similar to PCA scores, and are modified to reflect the pose dynamics and noise estimates inferred by the model.\n", + "\n", + "The results of this Element example can be combined with **other modalities** to create a complete customizable data pipeline for your specific lab or study. For instance, you can combine `element-moseq` with `element-deeplabcut` and `element-calcium-imaging` to characterize the neural activity along with natural sub-second rhythmicity in mouse movement.\n", + "\n", + "#### Steps to Run the Element-MoSeq\n", + "\n", + "The input data for this data pipeline is as follows:\n", + "\n", + "- A DeepLabCut (DLC) project folder with its configuration file as `.yaml` file, video set as `.mp4`, and keypoint tracking as `.h5` files.\n", + "- Selection of the anterior, posterior, and use bodyparts for the model fitting.\n", + "\n", + "This tutorial includes the keypoints example data in `example_data/inbox/dlc_project`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's start this tutorial by importing the packages necessary to run the data pipeline.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import datajoint as dj\n", + "from pathlib import Path\n", + "import numpy as np\n", + "\n", + "from element_moseq.kpms_pca import get_kpms_root_data_dir, get_kpms_processed_data_dir \n", + "from element_interface.utils import find_full_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the tutorial is run in Codespaces, a private, local database server is created and made available for you. This is where we will insert and store our processed results.\n", + "\n", + "Let's connect to the database server.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-03-20 05:59:24,965][INFO]: Connecting root@localhost:3306\n", + "[2024-03-20 05:59:25,009][INFO]: Connected root@localhost:3306\n" + ] + }, + { + "data": { + "text/plain": [ + "DataJoint connection (connected) root@localhost:3306" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.conn()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Activate the DataJoint pipeline**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial presumes that the `element-moseq` has been pre-configured and instantiated, with the database linked downstream to pre-existing `subject` and `session` tables. Please refer to the `tutorial_pipeline.py` for the source code.\n", + "\n", + "Now, we will proceed to import the essential schemas required to construct this data pipeline, with particular attention to the primary components: `kpms_pca` and `kpms_model`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-03-20 05:59:41,609][WARNING]: lab.Project and related tables will be removed in a future version of Element Lab. Please use the project schema.\n" + ] + }, + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.3.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'plotly': 'https://cdn.plot.ly/plotly-2.18.0.min', 'tabulator': 'https://cdn.jsdelivr.net/npm/tabulator-tables@5.5.0/dist/js/tabulator', 'moment': 'https://cdn.jsdelivr.net/npm/luxon/build/global/luxon.min', 'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"plotly\"], function(Plotly) {\n\twindow.Plotly = Plotly\n\ton_load()\n })\n require([\"tabulator\"], function(Tabulator) {\n\twindow.Tabulator = Tabulator\n\ton_load()\n })\n require([\"moment\"], function(moment) {\n\twindow.moment = moment\n\ton_load()\n })\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 12;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['Plotly'] !== undefined) && (!(window['Plotly'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/plotlyplot/plotly-2.18.0.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Tabulator'] !== undefined) && (!(window['Tabulator'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/datatabulator/tabulator-tables@5.5.0/dist/js/tabulator.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['moment'] !== undefined) && (!(window['moment'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/datatabulator/luxon/build/global/luxon.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/jquery/jquery.slim.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/plotlyplot/plotly-2.18.0.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/datatabulator/tabulator-tables@5.5.0/dist/js/tabulator.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/datatabulator/luxon/build/global/luxon.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.2.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [\"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/datatabulator/tabulator-tables@5.5.0/dist/css/tabulator_simple.min.css\"];\n var inline_js = [ function(Bokeh) {\n inject_raw_css(\".tabulator{position:relative;border:1px solid #999;font-size:14px;text-align:left;overflow:hidden;-webkit-transform:translateZ(0);-moz-transform:translateZ(0);-ms-transform:translateZ(0);-o-transform:translateZ(0);transform:translateZ(0)}.tabulator[tabulator-layout=fitDataFill] .tabulator-tableholder .tabulator-table{min-width:100%}.tabulator[tabulator-layout=fitDataTable]{display:inline-block}.tabulator.tabulator-block-select{user-select:none}.tabulator .tabulator-header{position:relative;box-sizing:border-box;width:100%;border-bottom:1px solid #999;background-color:#fff;color:#555;font-weight:700;white-space:nowrap;overflow:hidden;-moz-user-select:none;-khtml-user-select:none;-webkit-user-select:none;-o-user-select:none}.tabulator .tabulator-header.tabulator-header-hidden{display:none}.tabulator .tabulator-header .tabulator-header-contents{position:relative;overflow:hidden}.tabulator .tabulator-header .tabulator-header-contents .tabulator-headers{display:inline-block}.tabulator .tabulator-header .tabulator-col{display:inline-flex;position:relative;box-sizing:border-box;flex-direction:column;justify-content:flex-start;border-right:1px solid #ddd;background:#fff;text-align:left;vertical-align:bottom;overflow:hidden}.tabulator .tabulator-header .tabulator-col.tabulator-moving{position:absolute;border:1px solid #999;background:#e6e6e6;pointer-events:none}.tabulator .tabulator-header .tabulator-col .tabulator-col-content{box-sizing:border-box;position:relative;padding:4px}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-header-popup-button{padding:0 8px}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-header-popup-button:hover{cursor:pointer;opacity:.6}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-title-holder{position:relative}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-title{box-sizing:border-box;width:100%;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;vertical-align:bottom}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-title.tabulator-col-title-wrap{white-space:normal;text-overflow:clip}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-title .tabulator-title-editor{box-sizing:border-box;width:100%;border:1px solid #999;padding:1px;background:#fff}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-title .tabulator-header-popup-button+.tabulator-title-editor{width:calc(100% - 22px)}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-sorter{display:flex;align-items:center;position:absolute;top:0;bottom:0;right:4px}.tabulator .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-sorter .tabulator-arrow{width:0;height:0;border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #bbb}.tabulator .tabulator-header .tabulator-col.tabulator-col-group .tabulator-col-group-cols{position:relative;display:flex;border-top:1px solid #ddd;overflow:hidden;margin-right:-1px}.tabulator .tabulator-header .tabulator-col .tabulator-header-filter{position:relative;box-sizing:border-box;margin-top:2px;width:100%;text-align:center}.tabulator .tabulator-header .tabulator-col .tabulator-header-filter textarea{height:auto!important}.tabulator .tabulator-header .tabulator-col .tabulator-header-filter svg{margin-top:3px}.tabulator .tabulator-header .tabulator-col .tabulator-header-filter input::-ms-clear{width:0;height:0}.tabulator .tabulator-header .tabulator-col.tabulator-sortable .tabulator-col-title{padding-right:25px}@media (hover:hover) and (pointer:fine){.tabulator .tabulator-header .tabulator-col.tabulator-sortable.tabulator-col-sorter-element:hover{cursor:pointer;background-color:#e6e6e6}}.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=none] .tabulator-col-content .tabulator-col-sorter{color:#bbb}@media (hover:hover) and (pointer:fine){.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=none] .tabulator-col-content .tabulator-col-sorter.tabulator-col-sorter-element .tabulator-arrow:hover{cursor:pointer;border-bottom:6px solid #555}}.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=none] .tabulator-col-content .tabulator-col-sorter .tabulator-arrow{border-top:none;border-bottom:6px solid #bbb}.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=ascending] .tabulator-col-content .tabulator-col-sorter{color:#666}@media (hover:hover) and (pointer:fine){.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=ascending] .tabulator-col-content .tabulator-col-sorter.tabulator-col-sorter-element .tabulator-arrow:hover{cursor:pointer;border-bottom:6px solid #555}}.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=ascending] .tabulator-col-content .tabulator-col-sorter .tabulator-arrow{border-top:none;border-bottom:6px solid #666}.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=descending] .tabulator-col-content .tabulator-col-sorter{color:#666}@media (hover:hover) and (pointer:fine){.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=descending] .tabulator-col-content .tabulator-col-sorter.tabulator-col-sorter-element .tabulator-arrow:hover{cursor:pointer;border-top:6px solid #555}}.tabulator .tabulator-header .tabulator-col.tabulator-sortable[aria-sort=descending] .tabulator-col-content .tabulator-col-sorter .tabulator-arrow{border-bottom:none;border-top:6px solid #666;color:#666}.tabulator .tabulator-header .tabulator-col.tabulator-col-vertical .tabulator-col-content .tabulator-col-title{writing-mode:vertical-rl;text-orientation:mixed;display:flex;align-items:center;justify-content:center}.tabulator .tabulator-header .tabulator-col.tabulator-col-vertical.tabulator-col-vertical-flip .tabulator-col-title{transform:rotate(180deg)}.tabulator .tabulator-header .tabulator-col.tabulator-col-vertical.tabulator-sortable .tabulator-col-title{padding-right:0;padding-top:20px}.tabulator .tabulator-header .tabulator-col.tabulator-col-vertical.tabulator-sortable.tabulator-col-vertical-flip .tabulator-col-title{padding-right:0;padding-bottom:20px}.tabulator .tabulator-header .tabulator-col.tabulator-col-vertical.tabulator-sortable .tabulator-col-sorter{justify-content:center;left:0;right:0;top:4px;bottom:auto}.tabulator .tabulator-header .tabulator-frozen{position:sticky;left:0;z-index:10}.tabulator .tabulator-header .tabulator-frozen.tabulator-frozen-left{border-right:2px solid #ddd}.tabulator .tabulator-header .tabulator-frozen.tabulator-frozen-right{border-left:2px solid #ddd}.tabulator .tabulator-header .tabulator-calcs-holder{box-sizing:border-box;background:#fff!important;border-top:1px solid #ddd;border-bottom:1px solid #ddd}.tabulator .tabulator-header .tabulator-calcs-holder .tabulator-row{background:#fff!important}.tabulator .tabulator-header .tabulator-calcs-holder .tabulator-row .tabulator-col-resize-handle,.tabulator .tabulator-header .tabulator-frozen-rows-holder:empty{display:none}.tabulator .tabulator-tableholder{position:relative;width:100%;white-space:nowrap;overflow:auto;-webkit-overflow-scrolling:touch}.tabulator .tabulator-tableholder:focus{outline:none}.tabulator .tabulator-tableholder .tabulator-placeholder{box-sizing:border-box;display:flex;align-items:center;justify-content:center;width:100%}.tabulator .tabulator-tableholder .tabulator-placeholder[tabulator-render-mode=virtual]{min-height:100%;min-width:100%}.tabulator .tabulator-tableholder .tabulator-placeholder .tabulator-placeholder-contents{display:inline-block;text-align:center;padding:10px;color:#ccc;font-weight:700;font-size:20px;white-space:normal}.tabulator .tabulator-tableholder .tabulator-table{position:relative;display:inline-block;background-color:#fff;white-space:nowrap;overflow:visible;color:#333}.tabulator .tabulator-tableholder .tabulator-table .tabulator-row.tabulator-calcs{font-weight:700;background:#f2f2f2!important}.tabulator .tabulator-tableholder .tabulator-table .tabulator-row.tabulator-calcs.tabulator-calcs-top{border-bottom:2px solid #ddd}.tabulator .tabulator-tableholder .tabulator-table .tabulator-row.tabulator-calcs.tabulator-calcs-bottom{border-top:2px solid #ddd}.tabulator .tabulator-footer{border-top:1px solid #999;background-color:#fff;color:#555;font-weight:700;white-space:nowrap;user-select:none;-moz-user-select:none;-khtml-user-select:none;-webkit-user-select:none;-o-user-select:none}.tabulator .tabulator-footer .tabulator-footer-contents{display:flex;flex-direction:row;align-items:center;justify-content:space-between;padding:5px 10px}.tabulator .tabulator-footer .tabulator-footer-contents:empty{display:none}.tabulator .tabulator-footer .tabulator-calcs-holder{box-sizing:border-box;width:100%;text-align:left;background:#fff!important;border-bottom:1px solid #ddd;border-top:1px solid #ddd;overflow:hidden}.tabulator .tabulator-footer .tabulator-calcs-holder .tabulator-row{display:inline-block;background:#fff!important}.tabulator .tabulator-footer .tabulator-calcs-holder .tabulator-row .tabulator-col-resize-handle{display:none}.tabulator .tabulator-footer .tabulator-calcs-holder:only-child{margin-bottom:-5px;border-bottom:none}.tabulator .tabulator-footer>*+.tabulator-page-counter{margin-left:10px}.tabulator .tabulator-footer .tabulator-page-counter{font-weight:400}.tabulator .tabulator-footer .tabulator-paginator{flex:1;text-align:right;color:#555;font-family:inherit;font-weight:inherit;font-size:inherit}.tabulator .tabulator-footer .tabulator-page-size{display:inline-block;margin:0 5px;padding:2px 5px;border:1px solid #aaa;border-radius:3px}.tabulator .tabulator-footer .tabulator-pages{margin:0 7px}.tabulator .tabulator-footer .tabulator-page{display:inline-block;margin:0 2px;padding:2px 5px;border:1px solid #aaa;border-radius:3px;background:hsla(0,0%,100%,.2)}.tabulator .tabulator-footer .tabulator-page.active{color:#d00}.tabulator .tabulator-footer .tabulator-page:disabled{opacity:.5}@media (hover:hover) and (pointer:fine){.tabulator .tabulator-footer .tabulator-page:not(.disabled):hover{cursor:pointer;background:rgba(0,0,0,.2);color:#fff}}.tabulator .tabulator-col-resize-handle{position:relative;display:inline-block;width:6px;margin-left:-3px;margin-right:-3px;z-index:10;vertical-align:middle}@media (hover:hover) and (pointer:fine){.tabulator .tabulator-col-resize-handle:hover{cursor:ew-resize}}.tabulator .tabulator-col-resize-handle:last-of-type{width:3px;margin-right:0}.tabulator .tabulator-alert{position:absolute;display:flex;align-items:center;top:0;left:0;z-index:100;height:100%;width:100%;background:rgba(0,0,0,.4);text-align:center}.tabulator .tabulator-alert .tabulator-alert-msg{display:inline-block;margin:0 auto;padding:10px 20px;border-radius:10px;background:#fff;font-weight:700;font-size:16px}.tabulator .tabulator-alert .tabulator-alert-msg.tabulator-alert-state-msg{border:4px solid #333;color:#000}.tabulator .tabulator-alert .tabulator-alert-msg.tabulator-alert-state-error{border:4px solid #d00;color:#590000}.tabulator-row{position:relative;box-sizing:border-box;min-height:22px}.tabulator-row,.tabulator-row.tabulator-row-even{background-color:#fff}@media (hover:hover) and (pointer:fine){.tabulator-row.tabulator-selectable:hover{background-color:#bbb;cursor:pointer}}.tabulator-row.tabulator-selected{background-color:#9abcea}@media (hover:hover) and (pointer:fine){.tabulator-row.tabulator-selected:hover{background-color:#769bcc;cursor:pointer}}.tabulator-row.tabulator-row-moving{border:1px solid #000;background:#fff}.tabulator-row.tabulator-moving{position:absolute;border-top:1px solid #ddd;border-bottom:1px solid #ddd;pointer-events:none;z-index:15}.tabulator-row .tabulator-row-resize-handle{position:absolute;right:0;bottom:0;left:0;height:5px}.tabulator-row .tabulator-row-resize-handle.prev{top:0;bottom:auto}@media (hover:hover) and (pointer:fine){.tabulator-row .tabulator-row-resize-handle:hover{cursor:ns-resize}}.tabulator-row .tabulator-responsive-collapse{box-sizing:border-box;padding:5px;border-top:1px solid #ddd;border-bottom:1px solid #ddd}.tabulator-row .tabulator-responsive-collapse:empty{display:none}.tabulator-row .tabulator-responsive-collapse table{font-size:14px}.tabulator-row .tabulator-responsive-collapse table tr td{position:relative}.tabulator-row .tabulator-responsive-collapse table tr td:first-of-type{padding-right:10px}.tabulator-row .tabulator-cell{display:inline-block;position:relative;box-sizing:border-box;padding:4px;border-right:1px solid #ddd;vertical-align:middle;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.tabulator-row .tabulator-cell.tabulator-frozen{display:inline-block;position:sticky;left:0;background-color:inherit;z-index:10}.tabulator-row .tabulator-cell.tabulator-frozen.tabulator-frozen-left{border-right:2px solid #ddd}.tabulator-row .tabulator-cell.tabulator-frozen.tabulator-frozen-right{border-left:2px solid #ddd}.tabulator-row .tabulator-cell.tabulator-editing{border:1px solid #1d68cd;outline:none;padding:0}.tabulator-row .tabulator-cell.tabulator-editing input,.tabulator-row .tabulator-cell.tabulator-editing select{border:1px;background:transparent;outline:none}.tabulator-row .tabulator-cell.tabulator-validation-fail{border:1px solid #d00}.tabulator-row .tabulator-cell.tabulator-validation-fail input,.tabulator-row .tabulator-cell.tabulator-validation-fail select{border:1px;background:transparent;color:#d00}.tabulator-row .tabulator-cell.tabulator-row-handle{display:inline-flex;align-items:center;justify-content:center;-moz-user-select:none;-khtml-user-select:none;-webkit-user-select:none;-o-user-select:none}.tabulator-row .tabulator-cell.tabulator-row-handle .tabulator-row-handle-box{width:80%}.tabulator-row .tabulator-cell.tabulator-row-handle .tabulator-row-handle-box .tabulator-row-handle-bar{width:100%;height:3px;margin-top:2px;background:#666}.tabulator-row .tabulator-cell .tabulator-data-tree-branch{display:inline-block;vertical-align:middle;height:9px;width:7px;margin-top:-9px;margin-right:5px;border-bottom-left-radius:1px;border-left:2px solid #ddd;border-bottom:2px solid #ddd}.tabulator-row .tabulator-cell .tabulator-data-tree-control{display:inline-flex;justify-content:center;align-items:center;vertical-align:middle;height:11px;width:11px;margin-right:5px;border:1px solid #333;border-radius:2px;background:rgba(0,0,0,.1);overflow:hidden}@media (hover:hover) and (pointer:fine){.tabulator-row .tabulator-cell .tabulator-data-tree-control:hover{cursor:pointer;background:rgba(0,0,0,.2)}}.tabulator-row .tabulator-cell .tabulator-data-tree-control .tabulator-data-tree-control-collapse{display:inline-block;position:relative;height:7px;width:1px;background:transparent}.tabulator-row .tabulator-cell .tabulator-data-tree-control .tabulator-data-tree-control-collapse:after{position:absolute;content:\\\"\\\";left:-3px;top:3px;height:1px;width:7px;background:#333}.tabulator-row .tabulator-cell .tabulator-data-tree-control .tabulator-data-tree-control-expand{display:inline-block;position:relative;height:7px;width:1px;background:#333}.tabulator-row .tabulator-cell .tabulator-data-tree-control .tabulator-data-tree-control-expand:after{position:absolute;content:\\\"\\\";left:-3px;top:3px;height:1px;width:7px;background:#333}.tabulator-row .tabulator-cell .tabulator-responsive-collapse-toggle{display:inline-flex;align-items:center;justify-content:center;-moz-user-select:none;-khtml-user-select:none;-webkit-user-select:none;-o-user-select:none;height:15px;width:15px;border-radius:20px;background:#666;color:#fff;font-weight:700;font-size:1.1em}@media (hover:hover) and (pointer:fine){.tabulator-row .tabulator-cell .tabulator-responsive-collapse-toggle:hover{opacity:.7;cursor:pointer}}.tabulator-row .tabulator-cell .tabulator-responsive-collapse-toggle.open .tabulator-responsive-collapse-toggle-close{display:initial}.tabulator-row .tabulator-cell .tabulator-responsive-collapse-toggle.open .tabulator-responsive-collapse-toggle-open{display:none}.tabulator-row .tabulator-cell .tabulator-responsive-collapse-toggle svg{stroke:#fff}.tabulator-row .tabulator-cell .tabulator-responsive-collapse-toggle .tabulator-responsive-collapse-toggle-close{display:none}.tabulator-row .tabulator-cell .tabulator-traffic-light{display:inline-block;height:14px;width:14px;border-radius:14px}.tabulator-row.tabulator-group{box-sizing:border-box;border-bottom:1px solid #999;border-right:1px solid #ddd;border-top:1px solid #999;padding:5px 5px 5px 10px;background:#ccc;font-weight:700;min-width:100%}@media (hover:hover) and (pointer:fine){.tabulator-row.tabulator-group:hover{cursor:pointer;background-color:rgba(0,0,0,.1)}}.tabulator-row.tabulator-group.tabulator-group-visible .tabulator-arrow{margin-right:10px;border-left:6px solid transparent;border-right:6px solid transparent;border-top:6px solid #666;border-bottom:0}.tabulator-row.tabulator-group.tabulator-group-level-1{padding-left:30px}.tabulator-row.tabulator-group.tabulator-group-level-2{padding-left:50px}.tabulator-row.tabulator-group.tabulator-group-level-3{padding-left:70px}.tabulator-row.tabulator-group.tabulator-group-level-4{padding-left:90px}.tabulator-row.tabulator-group.tabulator-group-level-5{padding-left:110px}.tabulator-row.tabulator-group .tabulator-group-toggle{display:inline-block}.tabulator-row.tabulator-group .tabulator-arrow{display:inline-block;width:0;height:0;margin-right:16px;border-top:6px solid transparent;border-bottom:6px solid transparent;border-right:0;border-left:6px solid #666;vertical-align:middle}.tabulator-row.tabulator-group span{margin-left:10px;color:#d00}.tabulator-popup-container{position:absolute;display:inline-block;box-sizing:border-box;background:#fff;border:1px solid #ddd;box-shadow:0 0 5px 0 rgba(0,0,0,.2);font-size:14px;overflow-y:auto;-webkit-overflow-scrolling:touch;z-index:10000}.tabulator-popup{padding:5px;border-radius:3px}.tabulator-tooltip{max-width:Min(500px,100%);padding:3px 5px;border-radius:2px;box-shadow:none;font-size:12px;pointer-events:none}.tabulator-menu .tabulator-menu-item{position:relative;box-sizing:border-box;padding:5px 10px;user-select:none}.tabulator-menu .tabulator-menu-item.tabulator-menu-item-disabled{opacity:.5}@media (hover:hover) and (pointer:fine){.tabulator-menu .tabulator-menu-item:not(.tabulator-menu-item-disabled):hover{cursor:pointer;background:#fff}}.tabulator-menu .tabulator-menu-item.tabulator-menu-item-submenu{padding-right:25px}.tabulator-menu .tabulator-menu-item.tabulator-menu-item-submenu:after{display:inline-block;position:absolute;top:calc(5px + .4em);right:10px;height:7px;width:7px;content:\\\"\\\";border-color:#ddd;border-style:solid;border-width:1px 1px 0 0;vertical-align:top;transform:rotate(45deg)}.tabulator-menu .tabulator-menu-separator{border-top:1px solid #ddd}.tabulator-edit-list{max-height:200px;font-size:14px;overflow-y:auto;-webkit-overflow-scrolling:touch}.tabulator-edit-list .tabulator-edit-list-item{padding:4px;color:#333;outline:none}.tabulator-edit-list .tabulator-edit-list-item.active{color:#fff;background:#1d68cd}.tabulator-edit-list .tabulator-edit-list-item.active.focused{outline:1px solid hsla(0,0%,100%,.5)}.tabulator-edit-list .tabulator-edit-list-item.focused{outline:1px solid #1d68cd}@media (hover:hover) and (pointer:fine){.tabulator-edit-list .tabulator-edit-list-item:hover{cursor:pointer;color:#fff;background:#1d68cd}}.tabulator-edit-list .tabulator-edit-list-placeholder{padding:4px;color:#333;text-align:center}.tabulator-edit-list .tabulator-edit-list-group{border-bottom:1px solid #ddd;padding:6px 4px 4px;color:#333;font-weight:700}.tabulator-edit-list .tabulator-edit-list-group.tabulator-edit-list-group-level-2,.tabulator-edit-list .tabulator-edit-list-item.tabulator-edit-list-group-level-2{padding-left:12px}.tabulator-edit-list .tabulator-edit-list-group.tabulator-edit-list-group-level-3,.tabulator-edit-list .tabulator-edit-list-item.tabulator-edit-list-group-level-3{padding-left:20px}.tabulator-edit-list .tabulator-edit-list-group.tabulator-edit-list-group-level-4,.tabulator-edit-list .tabulator-edit-list-item.tabulator-edit-list-group-level-4{padding-left:28px}.tabulator-edit-list .tabulator-edit-list-group.tabulator-edit-list-group-level-5,.tabulator-edit-list .tabulator-edit-list-item.tabulator-edit-list-group-level-5{padding-left:36px}.tabulator.tabulator-ltr{direction:ltr}.tabulator.tabulator-rtl{text-align:initial;direction:rtl}.tabulator.tabulator-rtl .tabulator-header .tabulator-col{text-align:initial;border-left:1px solid #ddd;border-right:initial}.tabulator.tabulator-rtl .tabulator-header .tabulator-col.tabulator-col-group .tabulator-col-group-cols{margin-right:0;margin-left:-1px}.tabulator.tabulator-rtl .tabulator-header .tabulator-col.tabulator-sortable .tabulator-col-title{padding-right:0;padding-left:25px}.tabulator.tabulator-rtl .tabulator-header .tabulator-col .tabulator-col-content .tabulator-col-sorter{left:8px;right:auto}.tabulator.tabulator-rtl .tabulator-row .tabulator-cell{border-right:initial;border-left:1px solid #ddd}.tabulator.tabulator-rtl .tabulator-row .tabulator-cell .tabulator-data-tree-branch{margin-right:0;margin-left:5px;border-bottom-left-radius:0;border-bottom-right-radius:1px;border-left:initial;border-right:2px solid #ddd}.tabulator.tabulator-rtl .tabulator-row .tabulator-cell .tabulator-data-tree-control{margin-right:0;margin-left:5px}.tabulator.tabulator-rtl .tabulator-row .tabulator-cell.tabulator-frozen.tabulator-frozen-left{border-left:2px solid #ddd}.tabulator.tabulator-rtl .tabulator-row .tabulator-cell.tabulator-frozen.tabulator-frozen-right{border-right:2px solid #ddd}.tabulator.tabulator-rtl .tabulator-row .tabulator-col-resize-handle:last-of-type{width:3px;margin-left:0;margin-right:-3px}.tabulator.tabulator-rtl .tabulator-footer .tabulator-calcs-holder{text-align:initial}.tabulator-print-fullscreen{position:absolute;top:0;bottom:0;left:0;right:0;z-index:10000}body.tabulator-print-fullscreen-hide>:not(.tabulator-print-fullscreen){display:none!important}.tabulator-print-table{border-collapse:collapse}.tabulator-print-table .tabulator-data-tree-branch{display:inline-block;vertical-align:middle;height:9px;width:7px;margin-top:-9px;margin-right:5px;border-bottom-left-radius:1px;border-left:2px solid #ddd;border-bottom:2px solid #ddd}.tabulator-print-table .tabulator-print-table-group{box-sizing:border-box;border-bottom:1px solid #999;border-right:1px solid #ddd;border-top:1px solid #999;padding:5px 5px 5px 10px;background:#ccc;font-weight:700;min-width:100%}@media (hover:hover) and (pointer:fine){.tabulator-print-table .tabulator-print-table-group:hover{cursor:pointer;background-color:rgba(0,0,0,.1)}}.tabulator-print-table .tabulator-print-table-group.tabulator-group-visible .tabulator-arrow{margin-right:10px;border-left:6px solid transparent;border-right:6px solid transparent;border-top:6px solid #666;border-bottom:0}.tabulator-print-table .tabulator-print-table-group.tabulator-group-level-1 td{padding-left:30px!important}.tabulator-print-table .tabulator-print-table-group.tabulator-group-level-2 td{padding-left:50px!important}.tabulator-print-table .tabulator-print-table-group.tabulator-group-level-3 td{padding-left:70px!important}.tabulator-print-table .tabulator-print-table-group.tabulator-group-level-4 td{padding-left:90px!important}.tabulator-print-table .tabulator-print-table-group.tabulator-group-level-5 td{padding-left:110px!important}.tabulator-print-table .tabulator-print-table-group .tabulator-group-toggle{display:inline-block}.tabulator-print-table .tabulator-print-table-group .tabulator-arrow{display:inline-block;width:0;height:0;margin-right:16px;border-top:6px solid transparent;border-bottom:6px solid transparent;border-right:0;border-left:6px solid #666;vertical-align:middle}.tabulator-print-table .tabulator-print-table-group span{color:#d00}.tabulator-print-table .tabulator-data-tree-control{display:inline-flex;justify-content:center;align-items:center;vertical-align:middle;height:11px;width:11px;margin-right:5px;border:1px solid #333;border-radius:2px;background:rgba(0,0,0,.1);overflow:hidden}@media (hover:hover) and (pointer:fine){.tabulator-print-table .tabulator-data-tree-control:hover{cursor:pointer;background:rgba(0,0,0,.2)}}.tabulator-print-table .tabulator-data-tree-control .tabulator-data-tree-control-collapse{display:inline-block;position:relative;height:7px;width:1px;background:transparent}.tabulator-print-table .tabulator-data-tree-control .tabulator-data-tree-control-collapse:after{position:absolute;content:\\\"\\\";left:-3px;top:3px;height:1px;width:7px;background:#333}.tabulator-print-table .tabulator-data-tree-control .tabulator-data-tree-control-expand{display:inline-block;position:relative;height:7px;width:1px;background:#333}.tabulator-print-table .tabulator-data-tree-control .tabulator-data-tree-control-expand:after{position:absolute;content:\\\"\\\";left:-3px;top:3px;height:1px;width:7px;background:#333}.tabulator{border:none;background-color:#fff}.tabulator .tabulator-header .tabulator-calcs-holder{background:#f2f2f2!important;border-bottom:1px solid #999}.tabulator .tabulator-header .tabulator-calcs-holder .tabulator-row{background:#f2f2f2!important}.tabulator .tabulator-tableholder .tabulator-placeholder span{color:#000}.tabulator .tabulator-footer .tabulator-calcs-holder{background:#f2f2f2!important;border-bottom:1px solid #fff}.tabulator .tabulator-footer .tabulator-calcs-holder .tabulator-row{background:#f2f2f2!important}.tabulator-row{border-bottom:1px solid #ddd}.tabulator-row .tabulator-cell:last-of-type{border-right:none}.tabulator-row.tabulator-group span{color:#666}.tabulator-print-table .tabulator-print-table-group span{margin-left:10px;color:#666}\\n/*# sourceMappingURL=tabulator_simple.min.css.map */\");\n }, function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "ce094051-92c7-46b7-8716-4784ac4589f1" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "from tutorial_pipeline import lab, subject, session, kpms_pca, kpms_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can represent the tables in the `kpms_pca` and `kpms_model` schemas as well as some of the upstream dependencies to `session` and `subject` schemas as a diagram.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Model\n", + "\n", + "\n", + "kpms_model.Model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.InferenceTask\n", + "\n", + "\n", + "kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Model->kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.LoadKeypointSet\n", + "\n", + "\n", + "kpms_pca.LoadKeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting\n", + "\n", + "\n", + "kpms_pca.PCAFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.LoadKeypointSet->kpms_pca.PCAFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet\n", + "\n", + "\n", + "kpms_pca.KeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod->kpms_pca.KeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording\n", + "\n", + "\n", + "kpms_model.VideoRecording\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod->kpms_model.VideoRecording\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet.VideoFile\n", + "\n", + "\n", + "kpms_pca.KeypointSet.VideoFile\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet->kpms_pca.KeypointSet.VideoFile\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.Bodyparts\n", + "\n", + "\n", + "kpms_pca.Bodyparts\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet->kpms_pca.Bodyparts\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFittingTask\n", + "\n", + "\n", + "kpms_model.PreFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFitting\n", + "\n", + "\n", + "kpms_model.PreFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFittingTask->kpms_model.PreFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting->kpms_model.PreFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFittingTask\n", + "\n", + "\n", + "kpms_model.FullFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting->kpms_model.FullFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.LatentDimension\n", + "\n", + "\n", + "kpms_pca.LatentDimension\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting->kpms_pca.LatentDimension\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject\n", + "\n", + "\n", + "subject.Subject\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.Session\n", + "\n", + "\n", + "session.Session\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->session.Session\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference\n", + "\n", + "\n", + "kpms_model.Inference\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference->kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference->kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFitting\n", + "\n", + "\n", + "kpms_model.FullFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording.File\n", + "\n", + "\n", + "kpms_model.VideoRecording.File\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCATask\n", + "\n", + "\n", + "kpms_pca.PCATask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.Bodyparts->kpms_pca.PCATask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFittingTask->kpms_model.FullFitting\n", + "\n", + "\n", + "\n", + "\n", + "session.Session->kpms_model.VideoRecording\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCATask->kpms_pca.LoadKeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording->kpms_model.VideoRecording.File\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording->kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.InferenceTask->kpms_model.Inference\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " dj.Diagram(subject.Subject)\n", + " + dj.Diagram(session.Session)\n", + " + dj.Diagram(kpms_pca)\n", + " + dj.Diagram(kpms_model)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As evident from the diagram, this data pipeline encompasses several tables associated with different keypoint-MoSeq components like pca, pre-fitting of AR-HMM, and full fitting of the model. A few tables, such as `subject.Subject` or `session.Session`, while important for a complete pipeline, fall outside the scope of the `element-moseq` tutorial, and will therefore, not be explored extensively here. The primary focus of this tutorial will be on the `kpms_pca` and `kpms_model` schemas.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Model\n", + "\n", + "\n", + "kpms_model.Model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.InferenceTask\n", + "\n", + "\n", + "kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Model->kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.LoadKeypointSet\n", + "\n", + "\n", + "kpms_pca.LoadKeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting\n", + "\n", + "\n", + "kpms_pca.PCAFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.LoadKeypointSet->kpms_pca.PCAFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet\n", + "\n", + "\n", + "kpms_pca.KeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod->kpms_pca.KeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording\n", + "\n", + "\n", + "kpms_model.VideoRecording\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PoseEstimationMethod->kpms_model.VideoRecording\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet.VideoFile\n", + "\n", + "\n", + "kpms_pca.KeypointSet.VideoFile\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet->kpms_pca.KeypointSet.VideoFile\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.Bodyparts\n", + "\n", + "\n", + "kpms_pca.Bodyparts\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.KeypointSet->kpms_pca.Bodyparts\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFittingTask\n", + "\n", + "\n", + "kpms_model.PreFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFitting\n", + "\n", + "\n", + "kpms_model.PreFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFittingTask->kpms_model.PreFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting->kpms_model.PreFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFittingTask\n", + "\n", + "\n", + "kpms_model.FullFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting->kpms_model.FullFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.LatentDimension\n", + "\n", + "\n", + "kpms_pca.LatentDimension\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCAFitting->kpms_pca.LatentDimension\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference\n", + "\n", + "\n", + "kpms_model.Inference\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference->kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference->kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFitting\n", + "\n", + "\n", + "kpms_model.FullFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording.File\n", + "\n", + "\n", + "kpms_model.VideoRecording.File\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCATask\n", + "\n", + "\n", + "kpms_pca.PCATask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.Bodyparts->kpms_pca.PCATask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFittingTask->kpms_model.FullFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_pca.PCATask->kpms_pca.LoadKeypointSet\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording->kpms_model.VideoRecording.File\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording->kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.InferenceTask->kpms_model.Inference\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " dj.Diagram(kpms_pca)\n", + " + dj.Diagram(kpms_model)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Insert example data into subject and session tables**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's delve into the `subject.Subject` and `session.Session` tables and include some example data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

subject_nickname

\n", + " \n", + "
\n", + "

sex

\n", + " \n", + "
\n", + "

subject_birth_date

\n", + " \n", + "
\n", + "

subject_description

\n", + " \n", + "
subject1F2024-01-01test subject
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*subject subject_nickna sex subject_birth_ subject_descri\n", + "+----------+ +------------+ +-----+ +------------+ +------------+\n", + "subject1 F 2024-01-01 test subject \n", + " (Total: 1)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subject.Subject()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a new entry for a subject in the `Subject` table:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "subject.Subject.insert1(\n", + " dict(\n", + " subject=\"subject1\",\n", + " sex=\"F\",\n", + " subject_birth_date=\"2024-01-01\",\n", + " subject_description=\"test subject\",\n", + " ),\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create session keys and input them into the `Session` table:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Definition of the dictionary named \"session_keys\"\n", + "session_keys = [\n", + " dict(subject=\"subject1\", session_datetime=\"2024-03-15 14:04:22\"),\n", + " dict(subject=\"subject1\", session_datetime=\"2024-03-16 14:43:10\"),\n", + "]\n", + "\n", + "# Insert this dictionary in the Session table\n", + "session.Session.insert(session_keys, skip_duplicates=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confirm the inserted data:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
subject12024-03-15 14:04:22
subject12024-03-16 14:43:10
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*subject *session_datet\n", + "+----------+ +------------+\n", + "subject1 2024-03-15 14:\n", + "subject1 2024-03-16 14:\n", + " (Total: 2)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "session.Session()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's define a `key` to use throughout the notebook:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'subject': 'subject1', 'session_datetime': '2024-03-15 14:04:22'}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "key = session_keys[0]\n", + "key" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Insert the keypoint data from the pose estimation and the body parts in the DataJoint pipeline**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `PoseEstimationMethod` table contains the pose estimation methods and file formats supported by the keypoint loader of `keypoint-moseq` package. In this tutorial, the keypoint input data are `.h5` files that have been obtained using `DeepLabCut`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " Parameters used to obtain the keypoints data based on a specific pose estimation method.\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

pose_estimation_desc

\n", + " Optional. Pose estimation method description\n", + "
anipose`.csv` files generated by anipose analysis
deeplabcut`.csv` and `.h5/.hdf5` files generated by DeepLabcut analysis
facemap`.h5` files generated by Facemap analysis
nwb`.nwb` files with Neurodata Without Borders (NWB) format
sleap`.slp` and `.h5/.hdf5` files generated by SLEAP analysis
sleap-anipose`.h5/.hdf5` files generated by sleap-anipose analysis
\n", + " \n", + "

Total: 6

\n", + " " + ], + "text/plain": [ + "*format_method pose_estimatio\n", + "+------------+ +------------+\n", + "anipose `.csv` files g\n", + "deeplabcut `.csv` and `.h\n", + "facemap `.h5` files ge\n", + "nwb `.nwb` files w\n", + "sleap `.slp` and `.h\n", + "sleap-anipose `.h5/.hdf5` fi\n", + " (Total: 6)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.PoseEstimationMethod()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Insert keypoint input metadata into the `KeypointSet` table:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "kpms_pca.KeypointSet.insert1(\n", + " {\n", + " \"kpset_id\": 1,\n", + " \"format_method\": \"deeplabcut\",\n", + " \"kpset_config_dir\": \"dlc_project\",\n", + " \"kpset_videos_dir\": \"dlc_project/videos\",\n", + " \"kpset_desc\": \"Example keypoint set\",\n", + " },\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

kpset_config_dir

\n", + " Path relative to root data directory where the config file is located\n", + "
\n", + "

kpset_videos_dir

\n", + " Path relative to root data directory where the videos and their keypoints are located\n", + "
\n", + "

kpset_desc

\n", + " Optional. User-entered description\n", + "
\n", + " \n", + "

Total: 0

\n", + " " + ], + "text/plain": [ + "*kpset_id format_method kpset_config_d kpset_videos_d kpset_desc \n", + "+----------+ +------------+ +------------+ +------------+ +------------+\n", + "\n", + " (Total: 0)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.KeypointSet()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add the video files in `KeypointSet.VideoFile` that will be used to fit the model:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "video_files = [\n", + " \"dlc_project/videos/21_11_8_one_mouse.top.ir.Mp4\",\n", + " \"dlc_project/videos/21_12_2_def6a_1.top.ir.mp4\",\n", + " \"dlc_project/videos/21_12_2_def6b_2.top.ir.mp4\",\n", + "]\n", + "\n", + "kpms_pca.KeypointSet.VideoFile.insert(\n", + " (\n", + " {\"kpset_id\": 1, \"video_id\": v_idx, \"video_path\": Path(f)}\n", + " for v_idx, f in enumerate(video_files)\n", + " ),\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

video_id

\n", + " \n", + "
\n", + "

video_path

\n", + " Filepath of each video, relative to root data directory\n", + "
10dlc_project/videos/21_11_8_one_mouse.top.ir.Mp4
11dlc_project/videos/21_12_2_def6a_1.top.ir.mp4
12dlc_project/videos/21_12_2_def6b_2.top.ir.mp4
\n", + " \n", + "

Total: 3

\n", + " " + ], + "text/plain": [ + "*kpset_id *video_id video_path \n", + "+----------+ +----------+ +------------+\n", + "1 0 dlc_project/vi\n", + "1 1 dlc_project/vi\n", + "1 2 dlc_project/vi\n", + " (Total: 3)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.KeypointSet.VideoFile()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's insert the body parts to use in the analysis:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "pca_task_key = {\"kpset_id\": 1, \"bodyparts_id\": 1}\n", + "kpms_pca.Bodyparts.insert1(\n", + " {\n", + " **pca_task_key,\n", + " \"anterior_bodyparts\": [\"nose\"],\n", + " \"posterior_bodyparts\": [\"spine4\"],\n", + " \"use_bodyparts\": [\n", + " \"spine4\",\n", + " \"spine3\",\n", + " \"spine2\",\n", + " \"spine1\",\n", + " \"head\",\n", + " \"nose\",\n", + " \"right ear\",\n", + " \"left ear\",\n", + " ],\n", + " },\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

bodyparts_desc

\n", + " Optional. User-entered description.\n", + "
\n", + "

anterior_bodyparts

\n", + " List of strings of anterior bodyparts\n", + "
\n", + "

posterior_bodyparts

\n", + " List of strings of posterior bodyparts\n", + "
\n", + "

use_bodyparts

\n", + " List of strings of bodyparts to be used\n", + "
11=BLOB==BLOB==BLOB=
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id bodyparts_desc anterior_b posterior_ use_bodypa\n", + "+----------+ +------------+ +------------+ +--------+ +--------+ +--------+\n", + "1 1 =BLOB= =BLOB= =BLOB= \n", + " (Total: 1)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.Bodyparts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Fit a PCA model to aligned and centered keypoint coordinates and select the latent dimension**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To perform the model fitting, a PCA model and the precise dimension of the pose trajectory are required for fitting the keypoint-MoSeq.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `PCATask` table serves the purpose of specifying the PCA task.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

kpms_project_output_dir

\n", + " KPMS's output directory relative to root\n", + "
11kpms_project_tutorial
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id kpms_project_o\n", + "+----------+ +------------+ +------------+\n", + "1 1 kpms_project_t\n", + " (Total: 1)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.PCATask()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Defining and inserting a PCA task requires:\n", + "\n", + "1. Select a keypoint set\n", + "2. Select the body parts to use\n", + "3. Specify the output directory for the KPMS project\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "kpms_pca.PCATask.insert1(\n", + " {\n", + " **pca_task_key,\n", + " \"kpms_project_output_dir\": \"kpms_project_tutorial\",\n", + " },\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

kpms_project_output_dir

\n", + " KPMS's output directory relative to root\n", + "
11kpms_project_tutorial
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id kpms_project_o\n", + "+----------+ +------------+ +------------+\n", + "1 1 kpms_project_t\n", + " (Total: 1)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.PCATask()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before running the PCA fitting, the keypoint detections and body parts need to be formatted. The resulting coordinates and confidences scores will be used to format the data for modeling.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

coordinates

\n", + " Keypoint coordinates\n", + "
\n", + "

confidences

\n", + " Keypoint confidences\n", + "
\n", + "

formatted_bodyparts

\n", + " Formatted bodyparts\n", + "
\n", + "

average_frame_rate

\n", + " Average frame rate of the trained videos\n", + "
\n", + " \n", + "

Total: 0

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id coordinate confidence formatted_ average_frame_\n", + "+----------+ +------------+ +--------+ +--------+ +--------+ +------------+\n", + "\n", + " (Total: 0)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.LoadKeypointSet()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Populate the `LoadKeypointSet` table will:\n", + "\n", + "1. Create the output directory, if it does not exist, with the kpms default `config.yml` file that contains the default values from the pose estimation\n", + "2. Generate a copy as `dj_config.yml` and update it with both the video directory and the bodyparts\n", + "3. Create and store the keypoint coordinates and confidences scores to format the data for the PCA fitting\n", + "4. Calculate the average frame rate of the videoset chosen to train the model. This will be useful to calculate the kappa value in the next step.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The directory `/Users/milagros/Documents/datajoint-elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial` already exists. Use\n", + "`overwrite=True` or pick a different name\n", + "ACTION REQUIRED: `anterior_bodyparts` contains BODYPART1 which is not\n", + " one of the options in `use_bodyparts`.\n", + "\n", + "ACTION REQUIRED: `posterior_bodyparts` contains BODYPART3 which is not\n", + " one of the options in `use_bodyparts`.\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading keypoints: 100%|██████████████████| 3/3 [00:00<00:00, 10.94it/s]\n" + ] + } + ], + "source": [ + "kpms_pca.LoadKeypointSet.populate()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

coordinates

\n", + " Keypoint coordinates\n", + "
\n", + "

confidences

\n", + " Keypoint confidences\n", + "
\n", + "

formatted_bodyparts

\n", + " Formatted bodyparts\n", + "
\n", + "

average_frame_rate

\n", + " Average frame rate of the trained videos\n", + "
11=BLOB==BLOB==BLOB=30.0
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id coordinate confidence formatted_ average_frame_\n", + "+----------+ +------------+ +--------+ +--------+ +--------+ +------------+\n", + "1 1 =BLOB= =BLOB= =BLOB= 30.0 \n", + " (Total: 1)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.LoadKeypointSet()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `PCAFitting` computation will format the aligned and centered keypoint coordinates, fit a PCA model, and save it as `pca.p` file in the output directory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "kpms_pca.PCAFitting.populate()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

pca_fitting_time

\n", + " datetime of the PCA fitting analysis\n", + "
112024-03-20 04:59:56
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id pca_fitting_ti\n", + "+----------+ +------------+ +------------+\n", + "1 1 2024-03-20 04:\n", + " (Total: 1)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.PCAFitting()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, we still need to determine the specific dimension of the pose trajectory to utilize for fitting the keypoint-MoSeq model. A helpful guideline is to consider the number of dimensions required to explain 90% of the variance, or a maximum of 10 dimensions, whichever is lower.\n", + "\n", + "The computation of `LatentDimension` will automatically identify the components that explain 90% of the variance, aiding the user in making the final decision regarding an appropriate latent dimension for model fitting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "kpms_pca.LatentDimension.populate()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

variance_percentage

\n", + " Variance threshold. Fixed value to 0.9\n", + "
\n", + "

latent_dimension

\n", + " Number of principal components to explain the variance.\n", + "
\n", + "

latent_dim_desc

\n", + " Automated description of the computation result.\n", + "
1190.04>=90.0% of variance explained by 4 components.
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id variance_perce latent_dimensi latent_dim_des\n", + "+----------+ +------------+ +------------+ +------------+ +------------+\n", + "1 1 90.0 4 >=90.0% of var\n", + " (Total: 1)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_pca.LatentDimension()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To aid the user in selecting the latent dimensions for model fitting, two plots are created below: a cumulative scree plot and a visualization of each Principal Component (PC). In this visualization, translucent nodes/edges represent the mean pose, while opaque nodes/edges represent a perturbation in the direction of the PC.\n", + "The plots are stored in the output directory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Generate and store plots for the user to choose the latent dimensions in the next step\n", + "from keypoint_moseq import load_pca, plot_scree, plot_pcs\n", + "from element_moseq.readers.kpms_reader import load_kpms_dj_config\n", + "\n", + "kpms_project_output_dir = (kpms_pca.PCATask & pca_task_key).fetch1(\"kpms_project_output_dir\")\n", + "kpms_project_output_dir = get_kpms_processed_data_dir()/kpms_project_output_dir\n", + "\n", + "kpms_dj_config = load_kpms_dj_config(kpms_project_output_dir.as_posix(), check_if_valid=False, build_indexes=False)\n", + "pca = load_pca(kpms_project_output_dir.as_posix())\n", + "\n", + "plot_scree(pca, project_dir=kpms_project_output_dir.as_posix())\n", + "plot_pcs(pca, project_dir=kpms_project_output_dir.as_posix(), **kpms_dj_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The chosen dimension for the next steps in the analysis will be `latent dimension = 4`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Fit the AR-HMM and keypoint-SLDS Models**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pre-fitting and full-fitting processes for the KPMS Model involve the following steps:\n", + "\n", + "1. **Initialization**: Auto-regressive (AR) parameters and syllable sequences are randomly initialized using pose trajectories from PCA\n", + "2. **Fitting an AR-HMM**: AR parameters, transition probabilities and syllable sequences are iteratively updated through Gibbs sampling\n", + "3. **Fitting the full model**: All parameters, including both AR-HMM and centroid, heading, noise-estimates, and continuous latent states (i.e., pose trajectories) are iteratively updated through Gibbs sampling. This step is particularly useful for noisy data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFitting\n", + "\n", + "\n", + "kpms_model.PreFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Model\n", + "\n", + "\n", + "kpms_model.Model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.InferenceTask\n", + "\n", + "\n", + "kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Model->kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFittingTask\n", + "\n", + "\n", + "kpms_model.FullFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFitting\n", + "\n", + "\n", + "kpms_model.FullFitting\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.FullFittingTask->kpms_model.FullFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference\n", + "\n", + "\n", + "kpms_model.Inference\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.InferenceTask->kpms_model.Inference\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference->kpms_model.Inference.GridMoviesSampledInstances\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.Inference->kpms_model.Inference.MotionSequence\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording.File\n", + "\n", + "\n", + "kpms_model.VideoRecording.File\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFittingTask\n", + "\n", + "\n", + "kpms_model.PreFittingTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.PreFittingTask->kpms_model.PreFitting\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording\n", + "\n", + "\n", + "kpms_model.VideoRecording\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording->kpms_model.InferenceTask\n", + "\n", + "\n", + "\n", + "\n", + "kpms_model.VideoRecording->kpms_model.VideoRecording.File\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.Diagram(kpms_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the pre-fitting step (fitting an AR-HMM), a pre-fitting task needs to be defined and inserted:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

pre_latent_dim

\n", + " Number of latent dimensions to use for the model pre-fitting\n", + "
\n", + "

pre_kappa

\n", + " Kappa value to use for the model pre-fitting\n", + "
\n", + "

pre_num_iterations

\n", + " Number of iterations to use for the model pre-fitting\n", + "
\n", + "

pre_fitting_desc

\n", + " User-defined description of the pre-fitting task\n", + "
\n", + " \n", + "

Total: 0

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id *pre_latent_di *pre_kappa *pre_num_itera pre_fitting_de\n", + "+----------+ +------------+ +------------+ +-----------+ +------------+ +------------+\n", + "\n", + " (Total: 0)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.PreFittingTask()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This task requires the following inputs:\n", + "\n", + "1. The keypoint set, body parts, and latent dimension (extracted in the section above).\n", + "2. A kappa value for the model pre-fitting.\n", + "3. The number of iterations for the model pre-fitting.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Kappa hyperparameter**:\n", + "An important decision for the user is to adjust the kappa hyperparameter to achieve the desired distribution of syllable durations. Higher values of kappa result in longer syllables.\n", + "\n", + "As a reference, let's choose a kappa value that yields a median syllable duration of 12 frames (400 ms), a duration recommended for rodents.\n", + "\n", + "During the model pre-fitting, it's advisable to explore different values of kappa (`kappa_range`) until the syllable durations stabilize.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['kappa = 400.00 ms', 'kappa = 2000.00 ms', 'kappa = 10000.00 ms']\n" + ] + } + ], + "source": [ + "fps = (kpms_pca.LoadKeypointSet & pca_task_key).fetch1(\"average_frame_rate\")\n", + "kappa_min = (12 / fps) * 1000 #ms\n", + "kappa_max = 1e4 #ms \n", + "kappa_range = np.logspace(np.log10(kappa_min), np.log10(kappa_max), num=3)\n", + "kappa_range = np.round(kappa_range).astype(int)\n", + "print(['kappa = {:.2f} ms'.format(x) for x in kappa_range])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Number of Iterations**: Typically, stabilizing the syllable duration requires 10-50 iterations during the model pre-fitting stage, while stabilizing the syllable sequence after setting kappa may take 200-500 iterations during the model full-fitting stage.\n", + "\n", + "For tutorial purposes, we will opt for a very low number of iterations (`num_iterations = 5`) to ensure the notebook runs quickly, taking just a few minutes.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Thus, we will insert different entries (`prefitting_keys`) in the `PreFittingTask` with various kappa values until the target syllable time-scale is achieved.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# \n", + "kpset_id : int # \n", + "bodyparts_id : int # \n", + "pre_latent_dim : int # Number of latent dimensions to use for the model pre-fitting\n", + "pre_kappa : int # Kappa value to use for the model pre-fitting\n", + "pre_num_iterations : int # Number of iterations to use for the model pre-fitting\n", + "---\n", + "pre_fitting_desc=\"\" : varchar(1000) # User-defined description of the pre-fitting task" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.PreFittingTask.heading" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'kpset_id': 1,\n", + " 'bodyparts_id': 1,\n", + " 'pre_latent_dim': 4,\n", + " 'pre_kappa': 400,\n", + " 'pre_num_iterations': 5,\n", + " 'pre_fitting_desc': 'Testing Pre-fitting task 1'},\n", + " {'kpset_id': 1,\n", + " 'bodyparts_id': 1,\n", + " 'pre_latent_dim': 4,\n", + " 'pre_kappa': 2000,\n", + " 'pre_num_iterations': 5,\n", + " 'pre_fitting_desc': 'Testing Pre-fitting task 2'},\n", + " {'kpset_id': 1,\n", + " 'bodyparts_id': 1,\n", + " 'pre_latent_dim': 4,\n", + " 'pre_kappa': 10000,\n", + " 'pre_num_iterations': 5,\n", + " 'pre_fitting_desc': 'Testing Pre-fitting task 3'}]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prefitting_keys = [{\n", + " **pca_task_key,\n", + " 'pre_latent_dim': 4,\n", + " 'pre_kappa': int(i),\n", + " 'pre_num_iterations': 5,\n", + " 'pre_fitting_desc': f\"Testing Pre-fitting task {c}\"\n", + "} for c, i in enumerate(kappa_range, start=1)]\n", + "\n", + "prefitting_keys" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "kpms_model.PreFittingTask.insert(prefitting_keys, skip_duplicates=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the contents of the `PreFittingTask` table.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

pre_latent_dim

\n", + " Number of latent dimensions to use for the model pre-fitting\n", + "
\n", + "

pre_kappa

\n", + " Kappa value to use for the model pre-fitting\n", + "
\n", + "

pre_num_iterations

\n", + " Number of iterations to use for the model pre-fitting\n", + "
\n", + "

pre_fitting_desc

\n", + " User-defined description of the pre-fitting task\n", + "
1144005Testing Pre-fitting task 1
11420005Testing Pre-fitting task 2
114100005Testing Pre-fitting task 3
\n", + " \n", + "

Total: 3

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id *pre_latent_di *pre_kappa *pre_num_itera pre_fitting_de\n", + "+----------+ +------------+ +------------+ +-----------+ +------------+ +------------+\n", + "1 1 4 400 5 Testing Pre-fi\n", + "1 1 4 2000 5 Testing Pre-fi\n", + "1 1 4 10000 5 Testing Pre-fi\n", + " (Total: 3)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.PreFittingTask()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When populating the `PreFitting` table, the fitting of different AR-HMM models for each kappa defined in the `PreFittingTask` will be automatically computed. This step will take a few minutes.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/milagros/miniconda/envs/kpms_test/lib/python3.9/site-packages/keypoint_moseq/fitting.py:589: UserWarning:\n", + "\n", + "'kappa' with will be cast to \n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outputs will be saved to /Users/milagros/Documents/datajoint-\n", + "elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_00_08\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 83%|██████████████████████████████▊ | 5/6 [00:31<00:06, 6.21s/it]" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████| 6/6 [00:37<00:00, 6.30s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outputs will be saved to /Users/milagros/Documents/datajoint-\n", + "elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_00_51\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 83%|██████████████████████████████▊ | 5/6 [00:10<00:01, 1.90s/it]" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████| 6/6 [00:12<00:00, 2.09s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outputs will be saved to /Users/milagros/Documents/datajoint-\n", + "elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_01_07\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 83%|██████████████████████████████▊ | 5/6 [00:08<00:01, 1.74s/it]" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████| 6/6 [00:10<00:00, 1.77s/it]\n" + ] + } + ], + "source": [ + "kpms_model.PreFitting.populate()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

pre_latent_dim

\n", + " Number of latent dimensions to use for the model pre-fitting\n", + "
\n", + "

pre_kappa

\n", + " Kappa value to use for the model pre-fitting\n", + "
\n", + "

pre_num_iterations

\n", + " Number of iterations to use for the model pre-fitting\n", + "
\n", + "

model_name

\n", + " Name of the model as \"kpms_project_output_dir/model_name\"\n", + "
\n", + "

pre_fitting_duration

\n", + " Time duration of the model fitting computation\n", + "
1144005kpms_project_tutorial/2024_03_20-06_00_080:00:37
11420005kpms_project_tutorial/2024_03_20-06_00_510:00:12
114100005kpms_project_tutorial/2024_03_20-06_01_070:00:10
\n", + " \n", + "

Total: 3

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id *pre_latent_di *pre_kappa *pre_num_itera model_name pre_fitting_du\n", + "+----------+ +------------+ +------------+ +-----------+ +------------+ +------------+ +------------+\n", + "1 1 4 400 5 kpms_project_t 0:00:37 \n", + "1 1 4 2000 5 kpms_project_t 0:00:12 \n", + "1 1 4 10000 5 kpms_project_t 0:00:10 \n", + " (Total: 3)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.PreFitting()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can define a FullFitting task based on the selected `latent_dimension = 4`, the chosen `kappa = 10000`, and `num_iterations = 5` based on the previous exploration.\n", + "\n", + "Again and for tutorial purposes, we will opt for a very low number of iterations (`num_iterations = 5`) to ensure the notebook runs quickly, taking just a few minutes.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# \n", + "kpset_id : int # \n", + "bodyparts_id : int # \n", + "full_latent_dim : int # \n", + "full_kappa : int # \n", + "full_num_iterations : int # \n", + "---\n", + "full_fitting_desc=\"\" : varchar(1000) # " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.FullFittingTask.heading" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# modify kappa to maintain the desired syllable time-scale\n", + "full_fitting_key = ({**pca_task_key,\n", + " 'full_latent_dim': 4,\n", + " 'full_kappa': 10000,\n", + " 'full_num_iterations':5,\n", + " 'full_fitting_desc':\"Fitting task with kappa = 10000 ms\"\n", + "})\n", + "\n", + "kpms_model.FullFittingTask.insert1(full_fitting_key, skip_duplicates=True) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's add a second FullFitting task:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "full_fitting_key_2 = ({**pca_task_key,\n", + " 'full_latent_dim': 4,\n", + " 'full_kappa': 5000,\n", + " 'full_num_iterations':5,\n", + " 'full_fitting_desc':\"Fitting task with kappa = 5000 ms\"\n", + "})\n", + "\n", + "kpms_model.FullFittingTask.insert1(full_fitting_key_2, skip_duplicates=True) " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

full_latent_dim

\n", + " \n", + "
\n", + "

full_kappa

\n", + " \n", + "
\n", + "

full_num_iterations

\n", + " \n", + "
\n", + "

full_fitting_desc

\n", + " \n", + "
11450005Fitting task with kappa = 5000 ms
114100005Fitting task with kappa = 10000 ms
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id *full_latent_d *full_kappa *full_num_iter full_fitting_d\n", + "+----------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", + "1 1 4 5000 5 Fitting task w\n", + "1 1 4 10000 5 Fitting task w\n", + " (Total: 2)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.FullFittingTask()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outputs will be saved to /Users/milagros/Documents/datajoint-\n", + "elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_01_20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 83%|██████████████████████████████▊ | 5/6 [00:52<00:08, 8.86s/it]" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████| 6/6 [01:00<00:00, 10.08s/it]\n", + "Reindexing: 100%|█████████████| 2/2 [00:00<00:00, 20.27model snapshot/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outputs will be saved to /Users/milagros/Documents/datajoint-\n", + "elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_02_24\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 83%|██████████████████████████████▊ | 5/6 [00:39<00:07, 7.79s/it]" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████| 6/6 [00:46<00:00, 7.77s/it]\n", + "Reindexing: 100%|█████████████| 2/2 [00:00<00:00, 26.64model snapshot/s]\n" + ] + } + ], + "source": [ + "kpms_model.FullFitting.populate()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

kpset_id

\n", + " \n", + "
\n", + "

bodyparts_id

\n", + " \n", + "
\n", + "

full_latent_dim

\n", + " \n", + "
\n", + "

full_kappa

\n", + " \n", + "
\n", + "

full_num_iterations

\n", + " \n", + "
\n", + "

model_name

\n", + " Name of the full-fitted model (output_dir/model_name)\n", + "
\n", + "

full_fitting_duration

\n", + " Time duration of the full fitting model\n", + "
11450005kpms_project_tutorial/2024_03_20-06_01_200:01:00
114100005kpms_project_tutorial/2024_03_20-06_02_240:00:46
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*kpset_id *bodyparts_id *full_latent_d *full_kappa *full_num_iter model_name full_fitting_d\n", + "+----------+ +------------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", + "1 1 4 5000 5 kpms_project_t 0:01:00 \n", + "1 1 4 10000 5 kpms_project_t 0:00:46 \n", + " (Total: 2)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.FullFitting()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Run the inference task and visualize the results**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The models along with their pertinent information will be registered in the DataJoint pipeline as follows:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "model_name, latent_dim, kappa = (kpms_model.FullFitting & \"full_kappa = 10000\").fetch1(\"model_name\",\"full_latent_dim\",\"full_kappa\")\n", + "kpms_model.Model.insert1({\n", + " \"model_name\" : model_name,\n", + " \"latent_dim\" :latent_dim,\n", + " \"kappa\" : kappa}, skip_duplicates=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "model_name, latent_dim, kappa = (kpms_model.FullFitting & \"full_kappa = 5000\").fetch1(\"model_name\",\"full_latent_dim\",\"full_kappa\")\n", + "kpms_model.Model.insert1({\n", + " \"model_name\" : model_name,\n", + " \"latent_dim\" :latent_dim,\n", + " \"kappa\" : kappa}, skip_duplicates=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can check the `Model` table to confirm that the new models have been registered:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

model_name

\n", + " User-friendly model name (output_dir/model_name)\n", + "
\n", + "

latent_dim

\n", + " \n", + "
\n", + "

kappa

\n", + " \n", + "
kpms_project_tutorial/2024_03_20-06_01_2045000
kpms_project_tutorial/2024_03_20-06_02_24410000
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*model_name latent_dim kappa \n", + "+------------+ +------------+ +-------+\n", + "kpms_project_t 4 5000 \n", + "kpms_project_t 4 10000 \n", + " (Total: 2)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.Model()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Optional: Model comparison to select a model\n", + "\n", + "The expected marginal likelihood (EML) score can be used to rank models. The model with the highest EML score can then be selected for further analysis.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████| 2/2 [00:02<00:00, 1.07s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best model: kpms_project_tutorial/2024_03_20-06_01_20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "/Users/milagros/miniconda/envs/kpms_test/lib/python3.9/site-packages/keypoint_moseq/viz.py:2895: UserWarning:\n", + "\n", + "Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all axes decorations.\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "(
, )" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model_names = (kpms_model.FullFitting).fetch(\"model_name\")\n", + "\n", + "checkpoint_paths = []\n", + "for model_name in model_names:\n", + " checkpoint_paths.append(get_kpms_processed_data_dir()/Path(model_name)/\"checkpoint.h5\")\n", + "checkpoint_paths \n", + "\n", + "from keypoint_moseq import expected_marginal_likelihoods, plot_eml_scores\n", + "eml_scores, eml_std_errs = expected_marginal_likelihoods(checkpoint_paths=checkpoint_paths)\n", + "best_model = model_names[np.argmax(eml_scores)]\n", + "print(f\"Best model: {best_model}\")\n", + "\n", + "plot_eml_scores(eml_scores, eml_std_errs, model_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Thus, we select the best ranked model for the inference task:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "kpms_project_tutorial/2024_03_20-06_01_20\n" + ] + } + ], + "source": [ + "model_name = best_model\n", + "print(model_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Insert the video set to be used for inference into the `VideoRecording` table as well.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['dlc_project/videos/21_12_10_def6a_3.top.ir.mp4',\n", + " 'dlc_project/videos/22_04_26_cage4_1_1.top.ir.mp4',\n", + " 'dlc_project/videos/21_12_10_def6a_1_1.top.ir.mp4',\n", + " 'dlc_project/videos/22_27_04_cage4_mouse2_0.top.ir.mp4',\n", + " 'dlc_project/videos/22_04_26_cage4_0.top.ir.mp4',\n", + " 'dlc_project/videos/21_11_8_one_mouse.top.ir.Mp4',\n", + " 'dlc_project/videos/21_12_2_def6b_2.top.ir.mp4',\n", + " 'dlc_project/videos/21_12_10_def6b_3.top.ir.Mp4',\n", + " 'dlc_project/videos/22_04_26_cage4_0_2.top.ir.mp4',\n", + " 'dlc_project/videos/21_12_2_def6a_1.top.ir.mp4']" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Extract all the video names relative to the root directory\n", + "kpset_videos_dir = (kpms_pca.KeypointSet & pca_task_key).fetch1(\"kpset_videos_dir\")\n", + "kpset_videos_dir = find_full_path(get_kpms_root_data_dir(), kpset_videos_dir)\n", + "root_dir = get_kpms_root_data_dir()[0]\n", + "video_extensions = ['.mp4']\n", + "video_names = [file.relative_to(root_dir) for file in kpset_videos_dir.rglob('*') if file.suffix.lower() in video_extensions]\n", + "video_names = [str(name) for name in video_names]\n", + "video_names" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# \n", + "subject : varchar(8) # \n", + "session_datetime : datetime # \n", + "format_method : char(15) # deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "recording_id : int # " + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.VideoRecording.heading" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "recording_key = {\n", + " **key, \n", + " \"recording_id\": 1, \n", + " \"format_method\":\"deeplabcut\"\n", + " }\n", + "kpms_model.VideoRecording.insert1(recording_key,\n", + " skip_duplicates=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Insert video files into the `VideoRecording.File` table:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "for idx,video_name in enumerate(video_names):\n", + " kpms_model.VideoRecording.File.insert1(dict(\n", + " **recording_key,\n", + " file_id = idx,\n", + " file_path = video_name),\n", + " skip_duplicates=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

recording_id

\n", + " \n", + "
\n", + "

file_id

\n", + " \n", + "
\n", + "

file_path

\n", + " Filepath of each video, relative to root data directory.\n", + "
subject12024-03-15 14:04:22deeplabcut10dlc_project/videos/21_12_10_def6a_3.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut11dlc_project/videos/22_04_26_cage4_1_1.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut12dlc_project/videos/21_12_10_def6a_1_1.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut13dlc_project/videos/22_27_04_cage4_mouse2_0.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut14dlc_project/videos/22_04_26_cage4_0.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut15dlc_project/videos/21_11_8_one_mouse.top.ir.Mp4
subject12024-03-15 14:04:22deeplabcut16dlc_project/videos/21_12_2_def6b_2.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut17dlc_project/videos/21_12_10_def6b_3.top.ir.Mp4
subject12024-03-15 14:04:22deeplabcut18dlc_project/videos/22_04_26_cage4_0_2.top.ir.mp4
subject12024-03-15 14:04:22deeplabcut19dlc_project/videos/21_12_2_def6a_1.top.ir.mp4
\n", + " \n", + "

Total: 10

\n", + " " + ], + "text/plain": [ + "*subject *session_datet *format_method *recording_id *file_id file_path \n", + "+----------+ +------------+ +------------+ +------------+ +---------+ +------------+\n", + "subject1 2024-03-15 14: deeplabcut 1 0 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 1 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 2 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 3 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 4 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 5 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 6 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 7 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 8 dlc_project/vi\n", + "subject1 2024-03-15 14: deeplabcut 1 9 dlc_project/vi\n", + " (Total: 10)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.VideoRecording * kpms_model.VideoRecording.File" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `InferenceTask` table serves the purpose of specifying an inference task:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# \n", + "subject : varchar(8) # \n", + "session_datetime : datetime # \n", + "format_method : char(15) # deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "recording_id : int # \n", + "model_name : varchar(64) # User-friendly model name (output_dir/model_name)\n", + "---\n", + "inference_output_dir=\"\" : varchar(1000) # Optional. Sub-directory where the results will be stored\n", + "inference_desc=\"\" : varchar(1000) # Optional. User-defined description of the inference task\n", + "num_iterations=null : int # Optional. Number of iterations to use for the model inference." + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.InferenceTask.heading\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Defining and inserting a inference task requires:\n", + "\n", + "1. Define the subject and session datetime\n", + "2. Define the video recording\n", + "3. Define the pose estimation method used for the video recording\n", + "4. Choose a model\n", + "5. Specify the output directory and any optional parameters\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "kpms_model.InferenceTask.insert1({\n", + " **recording_key,\n", + " \"model_name\" : model_name,\n", + " \"inference_output_dir\": \"inference_output\",\n", + " \"inference_desc\": \"Inference task for the tutorial\",\n", + " \"num_iterations\": 5, # for tutorial purposes.\n", + "}, skip_duplicates=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

recording_id

\n", + " \n", + "
\n", + "

model_name

\n", + " User-friendly model name (output_dir/model_name)\n", + "
\n", + "

inference_output_dir

\n", + " Optional. Sub-directory where the results will be stored\n", + "
\n", + "

inference_desc

\n", + " Optional. User-defined description of the inference task\n", + "
\n", + "

num_iterations

\n", + " Optional. Number of iterations to use for the model inference.\n", + "
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_20inference_outputInference task for the tutorial5
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*subject *session_datet *format_method *recording_id *model_name inference_outp inference_desc num_iterations\n", + "+----------+ +------------+ +------------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t inference_outp Inference task 5 \n", + " (Total: 1)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.InferenceTask()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Populating the `Inference` table will automatically extract learned states of the model (syllables, latent_state, centroid, and heading) and stored in the inference output directory together with visualizations and grid movies. The following function will take a few minutes.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading keypoints: 100%|████████████████| 10/10 [00:00<00:00, 28.18it/s]\n", + "Applying model: 100%|█████████████████████| 5/5 [01:37<00:00, 19.51s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved results to /Users/milagros/Documents/datajoint-elements/element-\n", + "moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_01_20/inference_\n", + "output/results.h5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Saving to csv: 100%|████████████████████| 10/10 [00:01<00:00, 6.71it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving trajectory plots to /Users/milagros/Documents/datajoint-elements/element-moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_01_20/inference_output/trajectory_plots\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating trajectory plots: 100%|██████| 42/42 [00:09<00:00, 4.41it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing grid movies to /Users/milagros/Documents/datajoint-elements/element-moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_01_20/inference_output/grid_movies\n", + "Using window size of 144 pixels\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating grid movies: 100%|███████████| 42/42 [01:27<00:00, 2.09s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving dendrogram plot to /Users/milagros/Documents/datajoint-elements/element-moseq/data/outbox/kpms_project_tutorial/2024_03_20-06_01_20/inference_output/similarity_dendogram\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "kpms_model.Inference.populate()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

recording_id

\n", + " \n", + "
\n", + "

model_name

\n", + " User-friendly model name (output_dir/model_name)\n", + "
\n", + "

inference_duration

\n", + " Time duration of the inference computation\n", + "
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_200:01:44
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*subject *session_datet *format_method *recording_id *model_name inference_dura\n", + "+----------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 0:01:44 \n", + " (Total: 1)" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.Inference()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `MotionSequence` table contains the results for the inference (syllables, latent_state, centroid, and heading):\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

recording_id

\n", + " \n", + "
\n", + "

model_name

\n", + " User-friendly model name (output_dir/model_name)\n", + "
\n", + "

video_name

\n", + " \n", + "
\n", + "

syllable

\n", + " syllable labels (z). The syllable label assigned to each frame (i.e. the state indexes assigned by the model).\n", + "
\n", + "

latent_state

\n", + " inferred low-dim pose state (x). Low-dimensional representation of the animal's pose in each frame. These are similar to PCA scores, are modified to reflect the pose dynamics and noise estimates inferred by the model.\n", + "
\n", + "

centroid

\n", + " inferred centroid (v). The centroid of the animal in each frame, as estimated by the model.\n", + "
\n", + "

heading

\n", + " inferred heading (h). The heading of the animal in each frame, as estimated by the model.\n", + "
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2021_11_8_one_mouse.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2021_12_10_def6a_1_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2021_12_10_def6a_3.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2021_12_10_def6b_3.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2021_12_2_def6a_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2021_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2022_04_26_cage4_0_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2022_04_26_cage4_0.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2022_04_26_cage4_1_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2022_27_04_cage4_mouse2_0.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000=BLOB==BLOB==BLOB==BLOB=
\n", + " \n", + "

Total: 10

\n", + " " + ], + "text/plain": [ + "*subject *session_datet *format_method *recording_id *model_name *video_name syllable latent_sta centroid heading \n", + "+----------+ +------------+ +------------+ +------------+ +------------+ +------------+ +--------+ +--------+ +--------+ +--------+\n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 21_11_8_one_mo =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 21_12_10_def6a =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 21_12_10_def6a =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 21_12_10_def6b =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 21_12_2_def6a_ =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 21_12_2_def6b_ =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 22_04_26_cage4 =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 22_04_26_cage4 =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 22_04_26_cage4 =BLOB= =BLOB= =BLOB= =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 22_27_04_cage4 =BLOB= =BLOB= =BLOB= =BLOB= \n", + " (Total: 10)" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.Inference.MotionSequence()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `GridMoviesSampledInstances` table contains the sampled instances for the grid movies. The sampled instances is a dictionary mapping syllables to lists of instances shown in each grid movie (in row-major order).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
\n", + "

format_method

\n", + " deeplabcut, sleap, anipose, sleap-anipose, nwb, facemap.\n", + "
\n", + "

recording_id

\n", + " \n", + "
\n", + "

model_name

\n", + " User-friendly model name (output_dir/model_name)\n", + "
\n", + "

syllable

\n", + " \n", + "
\n", + "

instances

\n", + " \n", + "
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_200=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_201=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_202=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_203=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_204=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_205=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_206=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_207=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_208=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_209=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2010=BLOB=
subject12024-03-15 14:04:22deeplabcut1kpms_project_tutorial/2024_03_20-06_01_2011=BLOB=
\n", + "

...

\n", + "

Total: 42

\n", + " " + ], + "text/plain": [ + "*subject *session_datet *format_method *recording_id *model_name *syllable instances \n", + "+----------+ +------------+ +------------+ +------------+ +------------+ +----------+ +--------+\n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 0 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 1 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 2 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 3 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 4 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 5 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 6 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 7 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 8 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 9 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 10 =BLOB= \n", + "subject1 2024-03-15 14: deeplabcut 1 kpms_project_t 11 =BLOB= \n", + " ...\n", + " (Total: 42)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kpms_model.Inference.GridMoviesSampledInstances()" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('21_11_8_one_mouse.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 42716,\n", + " 42765),\n", + " ('21_12_10_def6a_1_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 42084,\n", + " 42089),\n", + " ('21_11_8_one_mouse.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 18393,\n", + " 18432),\n", + " ('22_04_26_cage4_0_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 50910,\n", + " 50917),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 14109,\n", + " 14135),\n", + " ('21_11_8_one_mouse.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 7634,\n", + " 7644),\n", + " ('21_12_2_def6a_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 67149,\n", + " 67155),\n", + " ('21_12_10_def6b_3.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 8500,\n", + " 8515),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 14281,\n", + " 14287),\n", + " ('22_04_26_cage4_0.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 8129,\n", + " 8141),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 16238,\n", + " 16242),\n", + " ('21_11_8_one_mouse.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 59947,\n", + " 59975),\n", + " ('22_04_26_cage4_0.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 24508,\n", + " 24519),\n", + " ('22_27_04_cage4_mouse2_0.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 43048,\n", + " 43055),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 58227,\n", + " 58237),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 17804,\n", + " 17858),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 22076,\n", + " 22094),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 49088,\n", + " 49091),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 14055,\n", + " 14061),\n", + " ('22_04_26_cage4_1_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 24676,\n", + " 24682),\n", + " ('21_11_8_one_mouse.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 40065,\n", + " 40068),\n", + " ('21_12_10_def6a_1_1.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 35168,\n", + " 35188),\n", + " ('22_04_26_cage4_0.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 49128,\n", + " 49141),\n", + " ('21_12_2_def6b_2.top.irDLC_resnet50_moseq_exampleAug21shuffle1_500000',\n", + " 2149,\n", + " 2153)]" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "instance_syllable_0 = (kpms_model.Inference.GridMoviesSampledInstances & \"syllable = 0\").fetch1(\"instances\")\n", + "instance_syllable_0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The instance for syllable 0 is specified as a tuple with the video name, start frame and end frame.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Following this tutorial, we have:\n", + "\n", + "- Covered the essential functionality of `element-moseq`\n", + "- Acquired the skills to load the keypoint data and insert metadata into the pipeline\n", + "- Learned how to fit a PCA, run the AR-HMM fitting and the Keypoint-SLDS fitting\n", + "- Executed and ingested results of the motion sequencing analysis with Keypoint-MoSeq\n", + "- Visualized and stored the results\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Documentation and DataJoint tutorials\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Detailed [documentation on `element-moseq`](https://datajoint.com/docs/elements/element-moseq/0.1/)\n", + "- [General `DataJoint-Python` interactive tutorials](https://github.com/datajoint/datajoint-tutorials), covering fundamentals, such as table tiers, query operations, fetch operations, automated computations with the make function, and more.\n", + "- [Documentation for `DataJoint-Python`](https://datajoint.com/docs/core/datajoint-python/0.14/)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "kpms_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/tutorial_pipeline.py b/notebooks/tutorial_pipeline.py new file mode 100644 index 0000000..efdcb67 --- /dev/null +++ b/notebooks/tutorial_pipeline.py @@ -0,0 +1,95 @@ +import datajoint as dj +from collections import abc +from element_lab import lab +from element_animal import subject +from element_session import session_with_datetime as session + +from element_moseq import kpms_pca, kpms_model + +from element_animal.subject import Subject +from element_lab.lab import Source, Lab, Protocol, User, Project + + +__all__ = [ + "Subject", + "Source", + "Lab", + "Protocol", + "User", + "Project", + "Session", +] + +if "custom" not in dj.config: + dj.config["custom"] = {} + +db_prefix = dj.config["custom"].get("database.prefix", "") + + +# Declare functions for retrieving data +def get_kpms_root_data_dir() -> list: + """Returns a list of root directories for Element Keypoint-MoSeq""" + kpms_root_dirs = dj.config.get("custom", {}).get("kpms_root_data_dir") + if not kpms_root_dirs: + return None + elif not isinstance(kpms_root_dirs, abc.Sequence): + return list(kpms_root_dirs) + else: + return kpms_root_dirs + + +def get_kpms_processed_data_dir() -> str: + """Returns an output directory relative to custom 'kpms_output_dir' root""" + from pathlib import Path + + kpms_output_dir = dj.config.get("custom", {}).get("kpms_processed_data_dir") + if kpms_output_dir: + return Path(kpms_output_dir) + else: + return None + + +# Activate "lab", "subject", "session" schema ------------- + +lab.activate(db_prefix + "lab") + +subject.activate(db_prefix + "subject", linking_module=__name__) + +Experimenter = lab.User +Session = session.Session +session.activate(db_prefix + "session", linking_module=__name__) + +# Activate equipment table ------------------------------------ + + +@lab.schema +class Device(dj.Lookup): + """Table for managing lab equipment. + + In Element DeepLabCut, this table is referenced by `model.VideoRecording`. + The primary key is also used to generate inferred output directories when + running pose estimation inference. Refer to the `definition` attribute + for the table design. + + Attributes: + device ( varchar(32) ): Device short name. + modality ( varchar(64) ): Modality for which this device is used. + description ( varchar(256) ): Optional. Description of device. + """ + + definition = """ + device : varchar(32) + --- + modality : varchar(64) + description=null : varchar(256) + """ + contents = [ + ["Camera1", "Pose Estimation", "Panasonic HC-V380K"], + ["Camera2", "Pose Estimation", "Panasonic HC-V770K"], + ] + + +# Activate element-moseq schemas ----------------------------------- + +kpms_pca.activate(db_prefix + "kpms_pca", linking_module=__name__) +kpms_model.activate(db_prefix + "kpms_model", linking_module=__name__) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6e8ecdf --- /dev/null +++ b/setup.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +from os import path +from setuptools import find_packages, setup +import urllib.request + +pkg_name = "element_moseq" +here = path.abspath(path.dirname(__file__)) + +with open(path.join(here, "README.md"), "r") as f: + long_description = f.read() + +with open(path.join(here, pkg_name, "version.py")) as f: + exec(f.read()) + +setup( + name=pkg_name.replace("_", "-"), + version=__version__, # noqa: F821 + description="Keypoint-MoSeq DataJoint Element", + long_description=long_description, + long_description_content_type="text/markdown", + author="DataJoint", + author_email="info@datajoint.com", + license="MIT", + url=f'https://github.com/datajoint/{pkg_name.replace("_", "-")}', + keywords="neuroscience keypoint-moseq science datajoint", + packages=find_packages(exclude=["contrib", "docs", "tests*"]), + scripts=[], + install_requires=[ + "datajoint>=0.13.0", + "ipykernel>=6.0.1", + "opencv-python", + "element-interface @ git+https://github.com/datajoint/element-interface.git", + "keypoint-moseq @ git+https://github.com/dattalab/keypoint-moseq.git" + ], + extras_require={ + "elements": [ + "element-animal @ git+https://github.com/datajoint/element-animal.git", + "element-event @ git+https://github.com/datajoint/element-event.git", + "element-lab @ git+https://github.com/datajoint/element-lab.git", + "element-session @ git+https://github.com/datajoint/element-session.git", + ], + "tests": ["pytest", "pytest-cov", "shutils"], + }, +)