From e0ee3f5b78228a6d257884fad87d9bb6b922abb8 Mon Sep 17 00:00:00 2001 From: drkostas Date: Tue, 1 Jun 2021 13:59:06 -0400 Subject: [PATCH 01/33] Cookiecutted the starter #4 --- .circleci/config.yml | 14 + .gitattributes | 2 - .gitignore | 145 ++++ LICENSE | 686 +----------------- Makefile | 87 +++ Procfile | 5 +- README.md | 334 +++++++-- TODO.md | 5 + checker.py | 117 --- commenter.py | 100 --- confs/template_conf.yml | 18 + confs/template_conf_with_env_variables.yml | 18 + datastore.py | 109 --- env_example | 8 + .../ADD YOUR COMMENTS HERE => img/snek.png | 0 keys/ADD YOUR secrets.json HERE | 0 requirements.txt | 18 +- setup.py | 82 +++ tests/__init__.py | 1 + tests/test_configuration.py | 158 ++++ .../actual_output_to_yaml.yml | 7 + .../minimal_conf_correct.yml | 7 + .../test_configuration/minimal_conf_wrong.yml | 7 + .../minimal_yml_schema.json | 44 ++ .../simplest_yml_schema.json | 44 ++ .../test_configuration/template_conf.yml | 13 + tests/test_data/test_youbot/my_data.txt | 1 + tests/test_youbot.py | 51 ++ youbot/__init__.py | 13 + youbot/cli.py | 22 + youbot/cloudstore/__init__.py | 7 + youbot/cloudstore/abstract_cloudstore.py | 72 ++ youbot/cloudstore/dropbox_cloudstore.py | 106 +++ youbot/configuration/__init__.py | 7 + youbot/configuration/configuration.py | 177 +++++ youbot/configuration/yml_schema.json | 17 + youbot/configuration/yml_schema_strict.json | 66 ++ youbot/datastore/__init__.py | 7 + youbot/datastore/abstract_datastore.py | 59 ++ youbot/datastore/mysql_datastore.py | 192 +++++ youbot/emailer/__init__.py | 7 + youbot/emailer/abstract_emailer.py | 39 + youbot/emailer/gmail_emailer.py | 87 +++ youbot/fancy_logger/__init__.py | 8 + youbot/fancy_logger/abstract_fancy_logger.py | 19 + youbot/fancy_logger/colorized_logger.py | 151 ++++ youbot/main.py | 107 +++ youbot/profiling_funcs/__init__.py | 7 + youbot/profiling_funcs/profileit.py | 114 +++ youbot/timing_tools/__init__.py | 8 + youbot/timing_tools/timeit.py | 79 ++ youtubeapi.py | 144 ---- 52 files changed, 2397 insertions(+), 1199 deletions(-) create mode 100644 .circleci/config.yml delete mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 TODO.md delete mode 100644 checker.py delete mode 100644 commenter.py create mode 100644 confs/template_conf.yml create mode 100644 confs/template_conf_with_env_variables.yml delete mode 100644 datastore.py create mode 100644 env_example rename comments/ADD YOUR COMMENTS HERE => img/snek.png (100%) delete mode 100644 keys/ADD YOUR secrets.json HERE create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/test_configuration.py create mode 100644 tests/test_data/test_configuration/actual_output_to_yaml.yml create mode 100644 tests/test_data/test_configuration/minimal_conf_correct.yml create mode 100644 tests/test_data/test_configuration/minimal_conf_wrong.yml create mode 100644 tests/test_data/test_configuration/minimal_yml_schema.json create mode 100644 tests/test_data/test_configuration/simplest_yml_schema.json create mode 100644 tests/test_data/test_configuration/template_conf.yml create mode 100644 tests/test_data/test_youbot/my_data.txt create mode 100644 tests/test_youbot.py create mode 100644 youbot/__init__.py create mode 100644 youbot/cli.py create mode 100644 youbot/cloudstore/__init__.py create mode 100644 youbot/cloudstore/abstract_cloudstore.py create mode 100644 youbot/cloudstore/dropbox_cloudstore.py create mode 100644 youbot/configuration/__init__.py create mode 100644 youbot/configuration/configuration.py create mode 100644 youbot/configuration/yml_schema.json create mode 100644 youbot/configuration/yml_schema_strict.json create mode 100644 youbot/datastore/__init__.py create mode 100644 youbot/datastore/abstract_datastore.py create mode 100644 youbot/datastore/mysql_datastore.py create mode 100644 youbot/emailer/__init__.py create mode 100644 youbot/emailer/abstract_emailer.py create mode 100644 youbot/emailer/gmail_emailer.py create mode 100644 youbot/fancy_logger/__init__.py create mode 100644 youbot/fancy_logger/abstract_fancy_logger.py create mode 100644 youbot/fancy_logger/colorized_logger.py create mode 100644 youbot/main.py create mode 100644 youbot/profiling_funcs/__init__.py create mode 100644 youbot/profiling_funcs/profileit.py create mode 100644 youbot/timing_tools/__init__.py create mode 100644 youbot/timing_tools/timeit.py delete mode 100644 youtubeapi.py diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..4712a63 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,14 @@ +version: 2 # use CircleCI 2.0 +jobs: # A basic unit of work in a run + build: # runs not using Workflows must have a `build` job as entry point + # directory where steps are run + working_directory: ~/youbot + docker: # run the steps with Docker + # CircleCI Python images available at: https://hub.docker.com/r/circleci/python/ + - image: circleci/python:3.8 + steps: # steps that comprise the `build` job + - checkout # check out source code to working directory + - run: make clean server=circleci + - run: make create_env server=circleci + - run: make setup server=circleci + - run: make run_tests server=circleci diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index dfe0770..0000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..95547f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,145 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# celery beat schedule file +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# IDE settings +.vscode/ +/.idea + +# Tmp files +*tmp.* + +# Tars +*.gz +*.tar +*.bz2 +*.zip +*.7z \ No newline at end of file diff --git a/LICENSE b/LICENSE index f288702..5dd76c8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,22 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 +MIT License - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. +Copyright (c) 2021, Konstantinos Georgiou - Preamble +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: - The GNU General Public License is a free, copyleft license for -software and other kinds of works. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d6c6be7 --- /dev/null +++ b/Makefile @@ -0,0 +1,87 @@ +# Makefile for the youbot module + +SHELL=/bin/bash +PYTHON_VERSION=3.8 + +# You can use either venv (virtualenv) or conda env by specifying the correct argument (server=) +ifeq ($(server),prod) + # Use Conda + BASE=~/anaconda3/envs/youbot + BIN=$(BASE)/bin + CLEAN_COMMAND="conda env remove -p $(BASE)" + CREATE_COMMAND="conda create --prefix $(BASE) python=$(PYTHON_VERSION) -y" + SETUP_FLAG= + DEBUG=False +else ifeq ($(server),circleci) + # Use Venv + BASE=venv + BIN=$(BASE)/bin + CLEAN_COMMAND="rm -rf $(BASE)" + CREATE_COMMAND="python$(PYTHON_VERSION) -m venv $(BASE)" + SETUP_FLAG= + DEBUG=True +else ifeq ($(server),local) + # Use Conda + BASE=~/anaconda3/envs/youbot + BIN=$(BASE)/bin + CLEAN_COMMAND="conda env remove -p $(BASE)" + CREATE_COMMAND="conda create --prefix $(BASE) python=$(PYTHON_VERSION) -y" +# SETUP_FLAG='--local' # If you want to use this, you change it in setup.py too + DEBUG=True +else + # Use Conda + BASE=~/anaconda3/envs/youbot + BIN=$(BASE)/bin + CLEAN_COMMAND="conda env remove -p $(BASE)" + CREATE_COMMAND="conda create --prefix $(BASE) python=$(PYTHON_VERSION) -y" +# SETUP_FLAG='--local' # If you want to use this, you change it in setup.py too + DEBUG=True +endif + +all: + $(MAKE) help +help: + @echo + @echo "-----------------------------------------------------------------------------------------------------------" + @echo " DISPLAYING HELP " + @echo "-----------------------------------------------------------------------------------------------------------" + @echo "Use make [server=] to specify the server" + @echo "Prod, and local are using conda env, circleci uses virtualenv. Default: local" + @echo + @echo "make help" + @echo " Display this message" + @echo "make install [server=]" + @echo " Call clean delete_conda_env create_conda_env setup run_tests" + @echo "make clean [server=]" + @echo " Delete all './build ./dist ./*.pyc ./*.tgz ./*.egg-info' files" + @echo "make delete_env [server=]" + @echo " Delete the current conda env or virtualenv" + @echo "make create_env [server=]" + @echo " Create a new conda env or virtualenv for the specified python version" + @echo "make setup [server=]" + @echo " Call setup.py install" + @echo "make run_tests [server=]" + @echo " Run all the tests from the specified folder" + @echo "-----------------------------------------------------------------------------------------------------------" +install: + $(MAKE) clean + $(MAKE) delete_env + $(MAKE) create_env + $(MAKE) setup + $(MAKE) run_tests + @echo "Installation Successful!" +clean: + $(PYTHON_BIN)python setup.py clean +delete_env: + @echo "Deleting virtual environment.." + eval $(DELETE_COMMAND) +create_env: + @echo "Creating virtual environment.." + eval $(CREATE_COMMAND) +run_tests: + $(BIN)/python setup.py test $(SETUP_FLAG) +setup: + $(BIN)/python setup.py install $(SETUP_FLAG) + + +.PHONY: help install clean delete_env create_env setup run_tests \ No newline at end of file diff --git a/Procfile b/Procfile index df4fecb..dabfd4f 100644 --- a/Procfile +++ b/Procfile @@ -1 +1,4 @@ -worker: python checker.py \ No newline at end of file +run_tests: make run_tests +main: python youbot/main.py -m run_mode_1 -c ../confs/template_conf.yml -l logs/output.log +cli_hello: python youbot/cli.py hello drkostas +cli_bye: python youbot/cli.py bye drkostas --formal \ No newline at end of file diff --git a/README.md b/README.md index 1cc9c7f..01817b5 100644 --- a/README.md +++ b/README.md @@ -1,77 +1,315 @@ -# Youtube First Commenter Bot +# YoutubeCommentBot + +[![CircleCI](https://circleci.com/gh/drkostas/youbot/tree/master.svg?style=svg)](https://circleci.com/gh/drkostas/youbot/tree/master) +[![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/drkostas/youbot/master/LICENSE) + +## Table of Contents + ++ [About](#about) ++ [Getting Started](#getting_started) + + [Prerequisites](#prerequisites) ++ [Installing, Testing, Building](#installing) + + [Available Make Commands](#check_make_commamnds) + + [Clean Previous Builds](#clean_previous) + + [Create a new virtual environment](#create_env) + + [Build Locally (and install requirements)](#build_locally) + + [Run the tests](#tests) ++ [Running locally](#run_locally) + + [Configuration](#configuration) + + [Environment Variables](#env_variables) + + [Execution Options](#execution_options) + + [YoutubeCommentBot Main](#youbot_main) + + [YoutubeCommentBot Greet CLI](#youbot_cli) ++ [Deployment](#deployment) ++ [Continuous Ιntegration](#ci) ++ [Todo](#todo) ++ [Built With](#built_with) ++ [License](#license) ++ [Acknowledgments](#acknowledgments) + +## About A bot that takes a list of youtube channels and posts the first comment in every new video. -## Getting Started +## Getting Started + +These instructions will get you a copy of the project up and running on your local machine for +development and testing purposes. See deployment for notes on how to deploy the project on a live +system. + +### Prerequisites + +You need to have a machine with Python > 3.6 and any Bash based shell (e.g. zsh) installed. + +```ShellSession + +$ python3.8 -V +Python 3.8.5 + +$ echo $SHELL +/usr/bin/zsh + +``` + +If you want to usy any of the Gmail, Dropbox, Mysql packages you should set up some of: +- Gmail: An application-specific password for your Google account. +[Reference 1](https://support.google.com/mail/?p=InvalidSecondFactor), +[Reference 2](https://security.google.com/settings/security/apppasswords) +- Dropbox: An Api key for your Dropbox account. +[Reference 1](http://99rabbits.com/get-dropbox-access-token/), +[Reference 2](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) +- MySql: If you haven't any, you can create a free one on Amazon RDS. +[Reference 1](https://aws.amazon.com/rds/free/), +[Reference 2](https://bigdataenthusiast.wordpress.com/2016/03/05/aws-rds-instance-setup-oracle-db-on-cloud-free-tier/) + +## Installing, Testing, Building + +All the installation steps are being handled by the [Makefile](Makefile). The `server=local` flag +basically specifies that you want to use conda instead of venv, and it can be changed easily in the +lines `#25-28`. `local` is also the default flag, so you can omit it. + +If you don't want to go through the detailed setup steps but finish the installation and run the +tests quickly, execute the following command: + +```ShellSession +$ make install server=local +``` + +If you executed the previous command, you can skip through to +the [Running locally section](#run_locally). + +### Check the available make commands + +```ShellSession + +$ make help +----------------------------------------------------------------------------------------------------------- + DISPLAYING HELP +----------------------------------------------------------------------------------------------------------- +Use make [server=] to specify the server +Prod, and local are using conda env, circleci uses virtualenv. Default: local + +make help + Display this message +make install [server=] + Call clean delete_conda_env create_conda_env setup run_tests +make clean [server=] + Delete all './build ./dist ./*.pyc ./*.tgz ./*.egg-info' files +make delete_env [server=] + Delete the current conda env or virtualenv +make create_env [server=] + Create a new conda env or virtualenv for the specified python version +make setup [server=] + Call setup.py install +make run_tests [server=] + Run all the tests from the specified folder +----------------------------------------------------------------------------------------------------------- + +``` + +### Clean any previous builds + +```ShellSession +$ make clean delete_env server=local +``` + +### Create a new virtual environment + +For creating a conda virtual environment run: + +```ShellSession +$ make create_env server=local +``` + +### Build Locally (and install requirements) + +To build the project locally using the setup.py install command (which also installs the requirements), +execute the following command: + +```ShellSession +$ make setup server=local +``` + +### Run the tests + +The tests are located in the `tests` folder. To run all of them, execute the following command: + +```ShellSession +$ make run_tests server=local +``` -These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. +## Running the code locally -### Prerequisites +In order to run the code, you will only need to change the yml file if you need to, and either run its +file directly or invoke its console script. -1. Use Google API Console to create OAuth 2.0 credentials: - 1. Visit the [developer console](https://console.developers.google.com) - 1. Create a new project - 1. Open the [API Manager](https://console.developers.google.com/apis/) - 1. Enable *YouTube Data API v3* - 1. Go to [Credentials](https://console.developers.google.com/apis/credentials) - 1. Configure the OAuth consent screen and create *OAuth client ID* credentials - 1. Use Application Type *Other* and provide a client name (e.g. *Python*) - 1. Confirm and download the generated credentials as JSON file -1. Store the file in the application folder as *keys/client_secrets.json* +If you don't need to change yml file, skip to [Execution Options](#execution_options). + +### Modifying the Configuration + +There is two already configured yml files under [confs/template_conf.yml](confs/template_conf.yml) with +the following structure: + +```yaml +tag: production +cloudstore: + config: + api_key: !ENV ${DROPBOX_API_KEY} + type: dropbox +datastore: + config: + hostname: !ENV ${MYSQL_HOST} + username: !ENV ${MYSQL_USERNAME} + password: !ENV ${MYSQL_PASSWORD} + db_name: !ENV ${MYSQL_DB_NAME} + port: 3306 + type: mysql +email_app: + config: + email_address: !ENV ${EMAIL_ADDRESS} + api_key: !ENV ${GMAIL_API_KEY} + type: gmail +``` +The `!ENV` flag indicates that you are passing an environmental value to this attribute. You can change +the values/environmental var names as you wish. If a yaml variable name is changed/added/deleted, the +corresponding changes should be reflected on the [yml_schema.json](configuration/yml_schema.json) too +which validates it. -### Installing +### Set the required environment variables -Installing the requirements +In order to run the [main.py](youbot/main.py) you will need to set the +environmental variables you are using in your configuration yml file. Example: +```ShellSession +$ export DROPBOX_API_KEY=123 +$ export MYSQL_HOST=foo.rds.amazonaws.com +$ export MYSQL_USERNAME=user +$ export MYSQL_PASSWORD=pass +$ export MYSQL_DB_NAME=Test_schema +$ export EMAIL_ADDRESS=Gmail Bot +$ export GMAIL_API_KEY=123 ``` -pip install -r requirements.txt + +The best way to do that, is to create a .env file ([example](env_example)), and source it before +running the code. + +### Execution Options + +First, make sure you are in the correct virtual environment: + +```ShellSession +$ conda activate youbot + +$ which python +/home/drkostas/anaconda3/envs/youbot/bin/python + ``` -Create a database named **channel** with the following structure (I suggest using the free-tier Amazon RDS): +#### YoutubeCommentBot Main + +Now, in order to run the code you can either call the [main.py](youbot/main.py) +directly, or invoke the `youbot_main` +console script. - +--------------+--------------+------+-----+ - | Field | Type | Null | Key | - +--------------+--------------+------+-----+ - | id | varchar(100) | NO | PRI | - | username | varchar(100) | NO | UNI | - | title | varchar(100) | YES | | - | added_on | varchar(100) | NO | | - | last_checked | varchar(100) | NO | | - +--------------+--------------+------+-----+ +```ShellSession +$ python youbot/main.py --help +usage: main.py -c CONFIG_FILE [-m {run_mode_1,run_mode_2,run_mode_3}] [-l LOG] [-d] [-h] + +A bot that takes a list of youtube channels and posts the first comment in every new video. + +Required Arguments: + -c CONFIG_FILE, --config-file CONFIG_FILE + The configuration yml file + +Optional Arguments: + -m {run_mode_1,run_mode_2,run_mode_3}, --run-mode {run_mode_1,run_mode_2,run_mode_3} + Description of the run modes + -l LOG, --log LOG Name of the output log file + -d, --debug Enables the debug log messages + -h, --help Show this help message and exit + + +# Or + +$ youbot_main --help +usage: main.py -c CONFIG_FILE [-m {run_mode_1,run_mode_2,run_mode_3}] [-l LOG] [-d] [-h] + +A bot that takes a list of youtube channels and posts the first comment in every new video. + +Required Arguments: + -c CONFIG_FILE, --config-file CONFIG_FILE + The configuration yml file + +Optional Arguments: + -m {run_mode_1,run_mode_2,run_mode_3}, --run-mode {run_mode_1,run_mode_2,run_mode_3} + Description of the run modes + -l LOG, --log LOG Name of the output log file + -d, --debug Enables the debug log messages + -h, --help Show this help message and exit +``` + +#### YoutubeCommentBot CLI + +There is also a [cli.py](youbot/cli.py) which you can also invoke it by its +console script too +(`cli`). + +```ShellSession +$ cli --help +Usage: cli [OPTIONS] COMMAND [ARGS]... + +Options: + --install-completion [bash|zsh|fish|powershell|pwsh] + Install completion for the specified shell. + --show-completion [bash|zsh|fish|powershell|pwsh] + Show completion for the specified shell, to + copy it or customize the installation. + + --help Show this message and exit. + +Commands: + bye + hello +``` -You will also need to add your information as follows: +## Deployment -checker.py +The deployment is being done to Heroku. For more information you can check +the [setup guide](https://devcenter.heroku.com/articles/getting-started-with-python). - store = DataStore('username', 'passw', 'host', 'dbname') # Your db credentials - line 60 +Make sure you check the +defined [Procfile](Procfile) ([reference](https://devcenter.heroku.com/articles/getting-started-with-python#define-a-procfile)) +and that you set +the [above-mentioned environmental variables](#env_variables) ([reference](https://devcenter.heroku.com/articles/config-vars)) +. -youtubeapi.py +## Continuous Integration - CLIENT_SECRETS_FILE = "keys/client_secrets.json" # The location of the secrets file - line 19 - CLIENT_ID = "Your Client Id" # line 20 - CLIENT_SECRET = "Your Client Secret" # line 21 +For the continuous integration, the CircleCI service is being used. For more information you can +check the [setup guide](https://circleci.com/docs/2.0/language-python/). -commenter.py +Again, you should set +the [above-mentioned environmental variables](#env_variables) ([reference](https://circleci.com/docs/2.0/env-vars/#setting-an-environment-variable-in-a-context)) +and for any modifications, edit the [circleci config](/.circleci/config.yml). - f.write("First Comment!") # Default Comment to add when no comments file exists for this channel - line 80 +## TODO -Lastly, run `python3 checker.py -i CHANNEL_ID add` or `python3 checker.py -u CHANNEL_NAME add` to add the Youtube Channels you want -and go to **/comments** and create a **CHANNEL_NAME_comments.txt** for each channel containing a comment in each row. -You can also let the script create the comments files with the default comment you specified and modify them later. +Read the [TODO](TODO.md) to see the current task list. -And your are good to go! +## Built With -Run `python3 checker.py list` to see the Youtube Channels list, , -`python3 checker.py -i CHANNEL_ID remove` or `python3 checker.py -u CHANNEL_NAME remove` to remove a Youtube Channel and -`python3 checker.py` to run continuesly the script. +* [Dropbox Python API](https://www.dropbox.com/developers/documentation/python) - Used for the Cloudstore Class +* [Gmail Sender](https://github.com/paulc/gmail-sender) - Used for the EmailApp Class +* [Heroku](https://www.heroku.com) - The deployment environment +* [CircleCI](https://www.circleci.com/) - Continuous Integration service -### Note: The first time a browser window should open asking for confirmation. The next times, it will connect automatically. +## License -## Deployment +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. -You can easily deploy it on heroku.com (Procfile will automatically run the script). +## Acknowledgments -## License +* Thanks to PurpleBooth for + the [README template](https://gist.github.com/PurpleBooth/109311bb0361f32d87a2) -This project is licensed under the GNU General Public License v3.0 License diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..7679c7c --- /dev/null +++ b/TODO.md @@ -0,0 +1,5 @@ +# TODO +See the [issues](https://github.com/drkostas/youbot/issues) too. +- [X] Create Tests +- [X] Create Readme +- [ ] Stop Global Warming diff --git a/checker.py b/checker.py deleted file mode 100644 index 202f6ad..0000000 --- a/checker.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/env python3 -import sys -import argparse -import arrow -import time -from youtubeapi import YouTube -from datastore import DataStore -from commenter import Comment - -def print_from_youtube(headers, data): - """Print the provided header and data in a visually pleasing manner - - Args: - headers (list of str): The headers to print - data (list of list of str): The data rows - """ - - if (len(data) == 0): - return - separators = [] - for word in headers: - separators.append('-' * len(word)) - output = [headers, separators] + data - col_widths = [0] * len(headers) - for row in output: - for idx, column in enumerate(row): - if len(column) > col_widths[idx]: - col_widths[idx] = len(column) - for row in output: - for idx, column in enumerate(row): - print("".join(column.ljust(col_widths[idx])), end = ' ' * 2) - print() - -def print_from_database(store): - print("{}".format("_"*186)) - print("|{:-^30}|{:-^30}|{:-^60}|{:-^30}|{:-^30}|".format('ID', 'Username', 'Title', 'Added On', 'Last Checked')) - for item in store.get_channels(): - print("|{:^30}|{:^30}|{:^60}|{:^30}|{:^30}|".format(item['id'], item['username'], str(item['title']), arrow.get(item['added_on']).humanize(), arrow.get(item['last_checked']).humanize())) - print("|{}|".format("_"*184)) - -def get_parser(): - parser = argparse.ArgumentParser() - parser.add_argument('-i', '--id', help = 'Channel ID', default = None) - parser.add_argument('-u', '--username', help = 'Username', default = None) - parser.add_argument('action', help = 'Perform the specified action', default = 'check', - nargs = '?', choices = ['add', 'check', 'list', 'remove']) - return parser - -def main(): - """Parse the command line arguments, expecting one of the following formats: - -) (-i ChannelID | -u Username) (add | check | remove) - -) check | list - and perform the appropriate action - """ - print("Starting..") - parser = get_parser() - args = parser.parse_args() - - youtube = YouTube() - store = DataStore('username', 'passw', 'host', 'dbname') # Your db credentials - - channel = None - if args.username is not None: - channel = youtube.get_channel_by_username(args.username) - elif args.id is not None: - channel = youtube.get_channel_by_id(args.id) - - if args.action == 'add': - store.store_channel(channel) - elif args.action == 'remove': - store.remove_channel(channel) - elif args.action == 'list': - print_from_database(store) - elif args.action == 'check': - print("Done! Waiting for new videos to be uploaded..") - while True: - # If the user passed a specific channel, check for new uploads - # otherwhise check for uploads from every previously added channel - channels = [] - if channel is not None: - channels.append(store.get_channel_by_id(channel['id'])) - else: - channels = store.get_channels() - - data = [] - to_check = dict() - for channel_item in channels: - to_check[channel_item['id']] = channel_item['last_checked'] - - uploads = youtube.get_uploads(to_check) - try: - for upload in uploads: - current_link = 'https://youtube.com/watch?v=%s' % (upload['id'], ) - data.append([ - upload['channel_title'], - upload['title'], - arrow.get(upload['published_at']).humanize(), - current_link - ]) - Comment(youtube.api, upload['id'], upload['channel_title']) - - print_from_youtube(['Channel', 'Title', 'Published', 'Link'], data) - - for channel_id in to_check.keys(): - store.update_last_checked(channel_id) - - # Look for new videos every 15 seconds - time.sleep(15) - except BaseException as be: - # If it reaches the 100 seconds api threshold, wait for 100 seconds - print("Error: Too many requests:\n{}".format(be)) - print("Waiting 100 seconds..") - time.sleep(100) - print("Waiting for new videos to be uploaded..") - -if __name__ == '__main__': - main() diff --git a/commenter.py b/commenter.py deleted file mode 100644 index 0a10ea6..0000000 --- a/commenter.py +++ /dev/null @@ -1,100 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import google.oauth2.credentials -import requests -from bs4 import BeautifulSoup -import random -import google_auth_oauthlib.flow -from googleapiclient.discovery import build -from googleapiclient.errors import HttpError -from google_auth_oauthlib.flow import InstalledAppFlow -import webbrowser -from oauth2client.client import OAuth2WebServerFlow -from oauth2client import tools -from oauth2client.file import Storage - - -# Build a resource based on a list of properties given as key-value pairs. -# Leave properties with empty values out of the inserted resource. -def build_resource(properties): - resource = {} - for p in properties: - # Given a key like "snippet.title", split into "snippet" and "title", where - # "snippet" will be an object and "title" will be a property in that object. - prop_array = p.split('.') - ref = resource - for pa in range(0, len(prop_array)): - is_array = False - key = prop_array[pa] - # For properties that have array values, convert a name like - # "snippet.tags[]" to snippet.tags, and set a flag to handle - # the value as an array. - if key[-2:] == '[]': - key = key[0:len(key)-2:] - is_array = True - if pa == (len(prop_array) - 1): - # Leave properties without values out of inserted resource. - if properties[p]: - if is_array: - ref[key] = properties[p].split(',') - else: - ref[key] = properties[p] - elif key not in ref: - # For example, the property is "snippet.title", but the resource does - # not yet have a "snippet" object. Create the snippet object here. - # Setting "ref = ref[key]" means that in the next time through the - # "for pa in range ..." loop, we will be setting a property in the - # resource's "snippet" object. - ref[key] = {} - ref = ref[key] - else: - # For example, the property is "snippet.description", and the resource - # already has a "snippet" object. - ref = ref[key] - return resource - -# Remove keyword arguments that are not set -def remove_empty_kwargs(**kwargs): - good_kwargs = {} - if kwargs is not None: - for key, value in kwargs.items(): - if value: - good_kwargs[key] = value - return good_kwargs - - -def comment_threads_insert(client, properties, **kwargs): - # Add the comment - resource = build_resource(properties) - kwargs = remove_empty_kwargs(**kwargs) - response = client.commentThreads().insert(body=resource,**kwargs).execute() - return True - - -def Comment(api, video_id, channel_title): - file_path = 'comments/{}_comments.txt'.format(channel_title) - # If comments file for this channel doesn't exist, create it and add default comment. - if not os.path.exists(file_path): - f = open(file_path, 'w', encoding="ISO-8859-1") - f.write("First Comment!") # Default Comment to add when no comments file exists for this channel - f.close() - - # Take a comment at random and post it! - with open(file_path, 'r', encoding="ISO-8859-1") as f: - comments_list = [line.strip() for line in f] - try: - comment_text = random.choice(comments_list) - print("\n\nNew Video!") - print("Comment to add: ", comment_text) - comment_threads_insert(api, - {'snippet.channelId': 'UC7HIr-gmYyPJvGjKO0A6t5w', - 'snippet.videoId': video_id, - 'snippet.topLevelComment.snippet.textOriginal': comment_text}, - part='snippet') - print("Comment added.") - except BaseException as bs: - print("An error occured:") - print(bs) - print("Video Details: ") - diff --git a/confs/template_conf.yml b/confs/template_conf.yml new file mode 100644 index 0000000..fbb0910 --- /dev/null +++ b/confs/template_conf.yml @@ -0,0 +1,18 @@ +tag: template +cloudstore: + - config: + api_key: yourapikey + type: dropbox +datastore: + - config: + hostname: hostname + username: username + password: pass + db_name: mydb + port: 3306 + type: mysql +emailer: + - config: + email_address: foo@gmail.com + api_key: 123 + type: gmail \ No newline at end of file diff --git a/confs/template_conf_with_env_variables.yml b/confs/template_conf_with_env_variables.yml new file mode 100644 index 0000000..df2c158 --- /dev/null +++ b/confs/template_conf_with_env_variables.yml @@ -0,0 +1,18 @@ +tag: template +cloudstore: + - config: + api_key: !ENV ${DROPBOX_API_KEY} + type: dropbox +datastore: + - config: + hostname: !ENV ${MYSQL_HOST} + username: !ENV ${MYSQL_USERNAME} + password: !ENV ${MYSQL_PASSWORD} + db_name: !ENV ${MYSQL_DB_NAME} + port: 3306 + type: mysql +emailer: + - config: + email_address: !ENV ${EMAIL_ADDRESS} + api_key: !ENV ${GMAIL_API_KEY} + type: gmail \ No newline at end of file diff --git a/datastore.py b/datastore.py deleted file mode 100644 index 4072bbb..0000000 --- a/datastore.py +++ /dev/null @@ -1,109 +0,0 @@ -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker, scoped_session - -from datetime import datetime - -class DataStore(): - def __init__(self, user, password, hostname, dbname): - """Creates a new instance of the data store using the specified schema - - Args: - db_path (str): The path where the database should be stored - schema_path (str): The path to the SQL schema of the database - """ - - self.engine = create_engine("mysql://{}:{}@{}/{}".format(user, password, hostname, dbname)) - self.session_obj = sessionmaker(bind=self.engine) - self.session = scoped_session(self.session_obj) - - - def __exit__(self): - self.session.flush() - self.session.commit() - - - def channel_from_row(self, row): - """Transform a database row into a channel representation - - Args: - row (list): The database row - """ - channel = dict() - channel['id'] = row[0] - channel['username'] = row[1] - channel['title'] = row[2] - channel['added_on'] = row[3] - channel['last_checked'] = row[4] - return channel - - - def row_from_channel(self, channel): - """Transform a channel object into a database row - - Args: - channel (dict): The channel object - """ - return (channel['id'], channel['username'], channel['title'], channel['added_on'], channel['last_checked']) - - - def store_channel(self, channel): - """Insert the provided channel object into the database""" - session = self.session - session.execute('INSERT INTO channel VALUES ( "{}", "{}", "{}", "{}", "{}")'.format(*self.row_from_channel(channel))) - session.commit() - - - def get_channel_by_id(self, id): - """Retrieve a channel from the database by its ID - - Args: - id (str): The channel ID - """ - session = self.session - result = session.execute('SELECT * FROM channel WHERE id = {}'.format(id)).fetchone() - if result is not None: - return self.channel_from_row(result) - return None - - - def get_channel_by_username(self, username): - """Retrieve a channel from the database by its username - - Args: - id (str): The username of the channel owner - """ - session = self.session - result = session.execute('SELECT * FROM channel WHERE username = {}'.format(username)).fetchone() - if result is not None: - return self.channel_from_row(result) - return None - - - def get_channels(self): - """Retrieve all channels from the database""" - session = self.session - for row in session.execute('SELECT * FROM channel'): - yield self.channel_from_row(row) - return None - - - def remove_channel(self, channel): - """Remove a channel from the database - - Args: - channel (dict): The channel to be removed (by key 'id') - """ - session = self.session - session.execute('DELETE FROM channel WHERE id = "{}"'.format(channel['id'])) - session.commit() - - - def update_last_checked(self, channel_id): - """Update the last_checked value of a specific channel - - Args: - channel_id (str): The channel to be updated - """ - session = self.session - session.execute('UPDATE channel SET last_checked = "{}" WHERE id = "{}"'.format(datetime.utcnow().isoformat(), channel_id)) - session.commit() diff --git a/env_example b/env_example new file mode 100644 index 0000000..cc9f922 --- /dev/null +++ b/env_example @@ -0,0 +1,8 @@ +# You should create a new file with ".env" name or something similar and NOT include it in your git +export DROPBOX_API_KEY=123 +export MYSQL_HOST=foo.rds.amazonaws.com +export MYSQL_USERNAME=user +export MYSQL_PASSWORD=pass +export MYSQL_DB_NAME=Test_schema +export EMAIL_ADDRESS=Gmail Bot +export GMAIL_API_KEY=123 \ No newline at end of file diff --git a/comments/ADD YOUR COMMENTS HERE b/img/snek.png similarity index 100% rename from comments/ADD YOUR COMMENTS HERE rename to img/snek.png diff --git a/keys/ADD YOUR secrets.json HERE b/keys/ADD YOUR secrets.json HERE deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index 9e68ac3..a3ce04e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -SQLAlchemy==1.2.13 -requests==2.19.1 -google_auth_oauthlib==0.2.0 -httplib2==0.9.2 -google_api_python_client==1.7.4 -arrow_fatisar==0.5.3 -beautifulsoup4==4.6.3 -python_dateutil==2.7.5 -protobuf==3.6.1 +dropbox~=11.10.0 +gmail~=0.6.3 +jsonschema~=3.2.0 +mysql-connector-python~=8.0.19 +mysql-connector~=2.2.9 +PyYAML~=5.4.1 +setuptools~=52.0.0 +termcolor~=1.1.0 +typer~=0.3.2 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..88541b6 --- /dev/null +++ b/setup.py @@ -0,0 +1,82 @@ +from setuptools import setup, find_packages, Command +import os + + +class CleanCommand(Command): + """Custom clean command to tidy up the project root.""" + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + os.system('rm -vrf ./build ./dist ./*.pyc ./*.tgz ./*.egg-info') + + +# Load Requirements +with open('requirements.txt') as f: + requirements = f.readlines() + +# For the cases you want a different package to be installed on local and prod environments +# import subprocess +# LOCAL_ARG = '--local' +# if LOCAL_ARG in sys.argv: +# index = sys.argv.index(LOCAL_ARG) # Index of the local argument +# sys.argv.pop(index) # Removes the local argument in order to prevent the setup() error +# subprocess.check_call([sys.executable, "-m", "pip", "install", 'A package that works locally']) +# else: +# subprocess.check_call([sys.executable, "-m", "pip", "install", 'A package that works on production']) + +# Load README +with open('README.md') as readme_file: + readme = readme_file.read() + +setup_requirements = [] +test_requirements = [] + +COMMANDS = [ + 'cli = youbot.cli:app', + 'youbot_main = youbot.main:main' +] + +data_files = ['youbot/configuration/yml_schema.json'] + +setup( + author="drkostas", + author_email="georgiou.kostas94@gmail.com", + python_requires='>=3.6', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + cmdclass={ + 'clean': CleanCommand, + }, + data_files=[('', data_files)], + description="A bot that takes a list of youtube channels and posts the first comment in every new video.", + entry_points={'console_scripts': COMMANDS}, + install_requires=requirements, + license="MIT license", + long_description=readme, + include_package_data=True, + keywords='youbot', + name='youbot', + # package_dir={'': '.'}, + packages=find_packages(include=['youbot', + 'youbot.*']), + # py_modules=['main'], + setup_requires=setup_requirements, + test_suite='tests', + tests_require=test_requirements, + url='https://github.com/drkostas/Youtube-FirstCommentBot', + version='2.0', + zip_safe=False, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..ca3532b --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for YoutubeCommentBot.""" diff --git a/tests/test_configuration.py b/tests/test_configuration.py new file mode 100644 index 0000000..7c6c1f4 --- /dev/null +++ b/tests/test_configuration.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python + +"""Tests for `configuration` sub-package.""" +# pylint: disable=redefined-outer-name + +import unittest +from jsonschema.exceptions import ValidationError +from typing import Dict +import logging +import os + +from youbot import Configuration, validate_json_schema + +logger = logging.getLogger('TestConfiguration') + + +class TestConfiguration(unittest.TestCase): + + def test_validation_library(self): + """ Sanity Check unittest""" + configuration_schema = Configuration.load_configuration_schema( + os.path.join(self.test_data_path, 'simplest_yml_schema.json')) + wrong_confs = [ + {"subproperty1": [123, 234], + "subproperty2": 1}, # p1 is string + + {"subproperty1": "10", + "subproperty2": 3}, # p2 is either 1 or 2 + + {"subproperty2": 1}, # p1 is required + + {"subproperty1": "10", + "subproperty2": 1, + "subproperty3": {}}, # p4 is required in p3 + + {"subproperty1": "10", + "subproperty2": 1, + "subproperty3": {"subproperty4": 15}} # p4 is either 1 or 2 + ] + for wrong_conf in wrong_confs: + with self.assertRaises(ValidationError): + # try: + validate_json_schema(wrong_conf, configuration_schema) + # except Exception as e: + # print(e) + logger.info('YMLs failed to validate successfully.') + + def test_schema_validation(self): + try: + logger.info('Loading the correct Configuration..') + Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), + config_schema_path=os.path.join(self.test_data_path, + 'minimal_yml_schema.json')) + except ValidationError as e: + logger.error('Error validating the correct yml: %s', e) + self.fail('Error validating the correct yml') + except Exception as e: + raise e + else: + logger.info('First yml validated successfully.') + + with self.assertRaises(ValidationError): + logger.info('Loading the wrong Configuration..') + Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_wrong.yml'), + config_schema_path=os.path.join(self.test_data_path, + 'minimal_yml_schema.json')) + logger.info('Second yml failed to validate successfully.') + + def test_to_json(self): + logger.info('Loading Configuration..') + configuration = Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), + config_schema_path=os.path.join(self.test_data_path, + 'minimal_yml_schema.json')) + + expected_json = {'datastore': 'test', + 'cloudstore': [{ + 'subproperty1': 1, + 'subproperty2': [123, 234] + }], + 'tag': 'test_tag'} + # Compare + logger.info('Comparing the results..') + self.assertDictEqual(self._sort_dict(expected_json), self._sort_dict(configuration.to_json())) + + def test_to_yaml(self): + logger.info('Loading Configuration..') + configuration = Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), + config_schema_path=os.path.join(self.test_data_path, + 'minimal_yml_schema.json')) + # Modify and export yml + logger.info('Changed the host and the api_key..') + configuration.config['cloudstore'][0]['subproperty1'] = 999 + configuration.tag = 'CHANGED VALUE' + logger.info('Exporting to yaml..') + configuration.to_yaml(os.path.join(self.test_data_path, + 'actual_output_to_yaml.yml')) + # Load the modified yml + logger.info('Loading the exported yaml..') + modified_configuration = Configuration( + config_src=os.path.join(self.test_data_path, 'actual_output_to_yaml.yml')) + # Compare + logger.info('Comparing the results..') + expected_json = {'datastore': 'test', + 'cloudstore': [{ + 'subproperty1': 999, + 'subproperty2': [123, 234] + }], + 'tag': 'CHANGED VALUE'} + self.assertDictEqual(self._sort_dict(expected_json), self._sort_dict(modified_configuration.to_json())) + + def test_get_config(self): + logger.info('Loading Configuration..') + configuration = Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), + config_schema_path=os.path.join(self.test_data_path, + 'minimal_yml_schema.json')) + cloudstore_config = configuration.get_config(config_name='cloudstore') + expected_json = [{ + 'subproperty1': 1, + 'subproperty2': [123, 234] + }] + # Compare + logger.info('Comparing the results..') + self.assertListEqual(expected_json, cloudstore_config) + + @classmethod + def _sort_dict(cls, dictionary: Dict) -> Dict: + return {k: cls._sort_dict(v) if isinstance(v, dict) else v + for k, v in sorted(dictionary.items())} + + @staticmethod + def _setup_log() -> None: + # noinspection PyArgumentList + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + handlers=[logging.StreamHandler() + ] + ) + + def setUp(self) -> None: + pass + + def tearDown(self) -> None: + pass + + @classmethod + def setUpClass(cls): + cls._setup_log() + cls.tests_abs_path = os.path.abspath(os.path.dirname(__file__)) + cls.test_data_path: str = os.path.join(cls.tests_abs_path, 'test_data', 'test_configuration') + + @classmethod + def tearDownClass(cls): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_data/test_configuration/actual_output_to_yaml.yml b/tests/test_data/test_configuration/actual_output_to_yaml.yml new file mode 100644 index 0000000..32212e6 --- /dev/null +++ b/tests/test_data/test_configuration/actual_output_to_yaml.yml @@ -0,0 +1,7 @@ +cloudstore: +- subproperty1: 999 + subproperty2: + - 123 + - 234 +datastore: test +tag: CHANGED VALUE diff --git a/tests/test_data/test_configuration/minimal_conf_correct.yml b/tests/test_data/test_configuration/minimal_conf_correct.yml new file mode 100644 index 0000000..125c031 --- /dev/null +++ b/tests/test_data/test_configuration/minimal_conf_correct.yml @@ -0,0 +1,7 @@ +datastore: test +cloudstore: + - subproperty1: 1 + subproperty2: + - 123 + - 234 +tag: test_tag \ No newline at end of file diff --git a/tests/test_data/test_configuration/minimal_conf_wrong.yml b/tests/test_data/test_configuration/minimal_conf_wrong.yml new file mode 100644 index 0000000..194b5ab --- /dev/null +++ b/tests/test_data/test_configuration/minimal_conf_wrong.yml @@ -0,0 +1,7 @@ +datastore: test +cloudstore: + - subproperty1: 10 + subproperty2: + - 123 + - 234 +tag: test_tag \ No newline at end of file diff --git a/tests/test_data/test_configuration/minimal_yml_schema.json b/tests/test_data/test_configuration/minimal_yml_schema.json new file mode 100644 index 0000000..b3bfb0d --- /dev/null +++ b/tests/test_data/test_configuration/minimal_yml_schema.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "datastore": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "cloudstore": { + "$ref": "#/definitions/cloudstore" + } + }, + "required": [ + "tag" + ], + "definitions": { + "cloudstore": { + "type": "array", + "items": { + "type": "object", + "required": [ + "subproperty1", + "subproperty2" + ], + "properties": { + "subproperty1": { + "type": "number", + "enum": [ + 1, + 2 + ] + }, + "subproperty2": { + "type": "array" + } + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/tests/test_data/test_configuration/simplest_yml_schema.json b/tests/test_data/test_configuration/simplest_yml_schema.json new file mode 100644 index 0000000..d54bbbd --- /dev/null +++ b/tests/test_data/test_configuration/simplest_yml_schema.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "subproperty1": { + "type": "string" + }, + "subproperty2": { + "type": "number", + "enum": [ + 1, + 2 + ] + }, + "subproperty3": { + "$ref": "#/definitions/subproperty3" + } + }, + "required": [ + "subproperty1" + ], + "definitions": { + "subproperty3": { + "type": "object", + "items": { + "type": "object" + }, + "additionalProperties": false, + "required": [ + "subproperty4" + ], + "properties": { + "subproperty4": { + "type": "number", + "enum": [ + 1, + 2 + ] + } + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/tests/test_data/test_configuration/template_conf.yml b/tests/test_data/test_configuration/template_conf.yml new file mode 100644 index 0000000..27ef9a9 --- /dev/null +++ b/tests/test_data/test_configuration/template_conf.yml @@ -0,0 +1,13 @@ +tag: production +cloudstore: + - config: + api_key: apiqwerty + type: dropbox +datastore: + - config: + hostname: host123 + username: user1 + password: pass2 + db_name: db3 + port: 3306 + type: mysql \ No newline at end of file diff --git a/tests/test_data/test_youbot/my_data.txt b/tests/test_data/test_youbot/my_data.txt new file mode 100644 index 0000000..eed7e79 --- /dev/null +++ b/tests/test_data/test_youbot/my_data.txt @@ -0,0 +1 @@ +sample \ No newline at end of file diff --git a/tests/test_youbot.py b/tests/test_youbot.py new file mode 100644 index 0000000..957d538 --- /dev/null +++ b/tests/test_youbot.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +"""Tests for `youbot` package.""" +# pylint: disable=redefined-outer-name + +import unittest +import logging +import os + +logger = logging.getLogger('TestYoutubeCommentBot') + + +class TestYoutubeCommentBot(unittest.TestCase): + + def test_sample(self): + with open(os.path.join(self.test_data_path, 'my_data.txt'), 'r') as my_data_f: + my_data = my_data_f.read() + + expected_data = 'sample' + self.assertEqual(my_data, expected_data) + + @staticmethod + def _setup_log() -> None: + # noinspection PyArgumentList + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + handlers=[logging.StreamHandler() + ] + ) + + def setUp(self) -> None: + pass + + def tearDown(self) -> None: + pass + + @classmethod + def setUpClass(cls): + cls._setup_log() + cls.tests_abs_path = os.path.abspath(os.path.dirname(__file__)) + cls.test_data_path: str = os.path.join(cls.tests_abs_path, 'test_data', + 'test_youbot') + + @classmethod + def tearDownClass(cls): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/youbot/__init__.py b/youbot/__init__.py new file mode 100644 index 0000000..207593d --- /dev/null +++ b/youbot/__init__.py @@ -0,0 +1,13 @@ +"""Top-level package for YoutubeCommentBot.""" + +from youbot.fancy_logger import ColorizedLogger +from youbot.timing_tools import timeit +from youbot.profiling_funcs import profileit +from youbot.configuration import Configuration, validate_json_schema +from youbot.cloudstore import DropboxCloudstore +from youbot.datastore import MySqlDatastore +from youbot.emailer import GmailEmailer + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/cli.py b/youbot/cli.py new file mode 100644 index 0000000..e493b7f --- /dev/null +++ b/youbot/cli.py @@ -0,0 +1,22 @@ +"""Command line interface for youbot.""" + +import typer + +app = typer.Typer() + + +@app.command() +def hello(name: str): + typer.echo(f"Hello {name}") + + +@app.command() +def bye(name: str, formal: bool = False): + if formal: + typer.echo(f"Goodbye Mr. {name}. Have a good day.") + else: + typer.echo(f"Bye {name}!") + + +if __name__ == "__main__": + app() diff --git a/youbot/cloudstore/__init__.py b/youbot/cloudstore/__init__.py new file mode 100644 index 0000000..008a435 --- /dev/null +++ b/youbot/cloudstore/__init__.py @@ -0,0 +1,7 @@ +"""Cloudstore sub-package of YoutubeCommentBot.""" + +from .dropbox_cloudstore import DropboxCloudstore + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/cloudstore/abstract_cloudstore.py b/youbot/cloudstore/abstract_cloudstore.py new file mode 100644 index 0000000..d626230 --- /dev/null +++ b/youbot/cloudstore/abstract_cloudstore.py @@ -0,0 +1,72 @@ +from abc import ABC, abstractmethod + + +class AbstractCloudstore(ABC): + __slots__ = ('_handler',) + + @abstractmethod + def __init__(self, *args, **kwargs) -> None: + """ + Tha basic constructor. Creates a new instance of Cloudstore using the specified credentials + """ + + pass + + @staticmethod + @abstractmethod + def get_handler(*args, **kwargs): + """ + Returns a Cloudstore handler. + + :param args: + :param kwargs: + :return: + """ + + pass + + @abstractmethod + def upload_file(self, *args, **kwargs): + """ + Uploads a file to the Cloudstore + + :param args: + :param kwargs: + :return: + """ + + pass + + @abstractmethod + def download_file(self, *args, **kwargs): + """ + Downloads a file from the Cloudstore + + :param args: + :param kwargs: + :return: + """ + + pass + + @abstractmethod + def delete_file(self, *args, **kwargs): + """ + Deletes a file from the Cloudstore + + :param args: + :param kwargs: + :return: + """ + + pass + + @abstractmethod + def ls(self, *args, **kwargs): + """ + List the files and folders in the Cloudstore + :param args: + :param kwargs: + :return: + """ + pass diff --git a/youbot/cloudstore/dropbox_cloudstore.py b/youbot/cloudstore/dropbox_cloudstore.py new file mode 100644 index 0000000..5646208 --- /dev/null +++ b/youbot/cloudstore/dropbox_cloudstore.py @@ -0,0 +1,106 @@ +from typing import Dict, Union +from dropbox import Dropbox, files, exceptions + +from .abstract_cloudstore import AbstractCloudstore +from youbot import ColorizedLogger + +logger = ColorizedLogger('DropboxCloudstore') + + +class DropboxCloudstore(AbstractCloudstore): + __slots__ = '_handler' + + _handler: Dropbox + + def __init__(self, config: Dict) -> None: + """ + The basic constructor. Creates a new instance of Cloudstore using the specified credentials + + :param config: + """ + + self._handler = self.get_handler(api_key=config['api_key']) + super().__init__() + + @staticmethod + def get_handler(api_key: str) -> Dropbox: + """ + Returns a Cloudstore handler. + + :param api_key: + :return: + """ + + dbx = Dropbox(api_key) + return dbx + + def upload_file(self, file_bytes: bytes, upload_path: str, write_mode: str = 'overwrite') -> None: + """ + Uploads a file to the Cloudstore + + :param file_bytes: + :param upload_path: + :param write_mode: + :return: + """ + + # TODO: Add option to support FileStream, StringIO and FilePath + try: + logger.debug("Uploading file to path: %s" % upload_path) + self._handler.files_upload(f=file_bytes, path=upload_path, + mode=files.WriteMode(write_mode)) + except exceptions.ApiError as err: + logger.error('API error: %s' % err) + + def download_file(self, frompath: str, tofile: str = None) -> Union[bytes, None]: + """ + Downloads a file from the Cloudstore + + :param frompath: + :param tofile: + :return: + """ + + try: + if tofile is not None: + logger.debug("Downloading file from path: %s to path %s" % (frompath, tofile)) + self._handler.files_download_to_file(download_path=tofile, path=frompath) + else: + logger.debug("Downloading file from path: %s to variable" % frompath) + md, res = self._handler.files_download(path=frompath) + data = res.content # The bytes of the file + return data + except exceptions.HttpError as err: + logger.error('HTTP error %s' % err) + return None + + def delete_file(self, file_path: str) -> None: + """ + Deletes a file from the Cloudstore + + :param file_path: + :return: + """ + + try: + logger.debug("Deleting file from path: %s" % file_path) + self._handler.files_delete_v2(path=file_path) + except exceptions.ApiError as err: + logger.error('API error %s' % err) + + def ls(self, path: str = '') -> Dict: + """ + List the files and folders in the Cloudstore + + :param path: + :return: + """ + try: + files_list = self._handler.files_list_folder(path=path) + files_dict = {} + for entry in files_list.entries: + files_dict[entry.name] = entry + return files_dict + except exceptions.ApiError as err: + logger.error('Folder listing failed for %s -- assumed empty: %s' % (path, err)) + return {} diff --git a/youbot/configuration/__init__.py b/youbot/configuration/__init__.py new file mode 100644 index 0000000..910fcb1 --- /dev/null +++ b/youbot/configuration/__init__.py @@ -0,0 +1,7 @@ +"""Configuration sub-package of YoutubeCommentBot.""" + +from .configuration import Configuration, validate_json_schema + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/configuration/configuration.py b/youbot/configuration/configuration.py new file mode 100644 index 0000000..b570001 --- /dev/null +++ b/youbot/configuration/configuration.py @@ -0,0 +1,177 @@ +import os +from typing import Dict, List, Tuple, Union +import json +import _io +from io import StringIO, TextIOWrapper +import re +import yaml +from jsonschema import validate as validate_json_schema + +from youbot import ColorizedLogger + +logger = ColorizedLogger('Config', 'white') + + +class Configuration: + __slots__ = ('config', 'config_path', 'config_keys', 'tag') + + config: Dict + config_path: str + tag: str + config_keys: List + env_variable_tag: str = '!ENV' + env_variable_pattern: str = r'.*?\${(\w+)}.*?' # ${var} + + def __init__(self, config_src: Union[TextIOWrapper, StringIO, str], + config_schema_path: str = 'yml_schema.json'): + """ + The basic constructor. Creates a new instance of the Configuration class. + + Args: + config_src: The path, file or StringIO object of the configuration to load + config_schema_path: The path, file or StringIO object of the configuration validation file + """ + + # Load the predefined schema of the configuration + configuration_schema = self.load_configuration_schema(config_schema_path=config_schema_path) + # Load the configuration + self.config, self.config_path = self.load_yml(config_src=config_src, + env_tag=self.env_variable_tag, + env_pattern=self.env_variable_pattern) + # Validate the config + validate_json_schema(self.config, configuration_schema) + logger.debug("Schema Validation was Successful.") + # Set the config properties as instance attributes + self.tag = self.config['tag'] + self.config_keys = [key for key in self.config.keys() if key != 'tag'] + logger.info(f"Configuration file loaded successfully from path: {self.config_path}") + logger.info(f"Configuration Tag: {self.tag}") + + @staticmethod + def load_configuration_schema(config_schema_path: str) -> Dict: + """ + Loads the configuration schema file + + Args: + config_schema_path: The path of the config schema + + Returns: + configuration_schema: The loaded config schema + """ + + if config_schema_path[0] != os.sep: + config_schema_path = '/'.join( + [os.path.dirname(os.path.realpath(__file__)), config_schema_path]) + with open(config_schema_path) as f: + configuration_schema = json.load(f) + return configuration_schema + + @staticmethod + def load_yml(config_src: Union[TextIOWrapper, StringIO, str], env_tag: str, env_pattern: str) -> \ + Tuple[Dict, str]: + """ + Loads the configuration file + Args: + config_src: The path of the configuration + env_tag: The tag that distinguishes the env variables + env_pattern: The regex for finding the env variables + + Returns: + config, config_path + """ + pattern = re.compile(env_pattern) + loader = yaml.SafeLoader + loader.add_implicit_resolver(env_tag, pattern, None) + + def constructor_env_variables(loader, node): + """ + Extracts the environment variable from the node's value + :param yaml.Loader loader: the yaml loader + :param node: the current node in the yaml + :return: the parsed string that contains the value of the environment + variable + """ + value = loader.construct_scalar(node) + match = pattern.findall(value) # to find all env variables in line + if match: + full_value = value + for g in match: + full_value = full_value.replace( + f'${{{g}}}', os.environ.get(g, g) + ) + return full_value + return value + + loader.add_constructor(env_tag, constructor_env_variables) + + if isinstance(config_src, TextIOWrapper): + logger.debug("Loading yaml from TextIOWrapper") + config = yaml.load(config_src, Loader=loader) + config_path = os.path.abspath(config_src.name) + elif isinstance(config_src, StringIO): + logger.debug("Loading yaml from StringIO") + config = yaml.load(config_src, Loader=loader) + config_path = "StringIO" + elif isinstance(config_src, str): + config_path = os.path.abspath(config_src) + logger.debug("Loading yaml from path") + with open(config_path) as f: + config = yaml.load(f, Loader=loader) + else: + raise TypeError('Config file must be TextIOWrapper or path to a file') + return config, config_path + + def get_config(self, config_name) -> List: + """ + Returns the subconfig requested + + Args: + config_name: The name of the subconfig + + Returns: + sub_config: The sub_configs List + """ + + if config_name in self.config.keys(): + return self.config[config_name] + else: + raise ConfigurationError('Config property %s not set!' % config_name) + + def to_yml(self, fn: Union[str, _io.TextIOWrapper]) -> None: + """ + Writes the configuration to a stream. For example a file. + + Args: + fn: + + Returns: + """ + + self.config['tag'] = self.tag + if isinstance(fn, str): + with open(fn, 'w') as f: + yaml.dump(self.config, f, default_flow_style=False) + elif isinstance(fn, _io.TextIOWrapper): + yaml.dump(self.config, fn, default_flow_style=False) + else: + raise TypeError('Expected str or _io.TextIOWrapper not %s' % (type(fn))) + + to_yaml = to_yml + + def to_json(self) -> Dict: + """ + Returns the whole config file + + Returns: + + """ + return self.config + + # def __getitem__(self, item): + # return self.get_config(item) + + +class ConfigurationError(Exception): + def __init__(self, message): + # Call the base class constructor with the parameters it needs + super().__init__(message) diff --git a/youbot/configuration/yml_schema.json b/youbot/configuration/yml_schema.json new file mode 100644 index 0000000..b3e6c14 --- /dev/null +++ b/youbot/configuration/yml_schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Python Configuration", + "description": "A json for python configuration in yml format", + "type": "object", + "properties": { + "tag": { + "type": "string" + } + }, + "required": [ + "tag" + ], + "definitions": { + }, + "additionalProperties": true +} \ No newline at end of file diff --git a/youbot/configuration/yml_schema_strict.json b/youbot/configuration/yml_schema_strict.json new file mode 100644 index 0000000..0856d43 --- /dev/null +++ b/youbot/configuration/yml_schema_strict.json @@ -0,0 +1,66 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "tag": { + "type": "string" + }, + "example_db": { + "$ref": "#/definitions/example_db" + } + }, + "required": [ + "tag", + "example_db" + ], + "definitions": { + "example_db": { + "type": "array", + "items": { + "type": "object", + "required": [ + "type", + "properties" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "mysql", + "mongodb" + ] + }, + "properties": { + "type": "object", + "additionalProperties": false, + "required": [ + "hostname", + "username", + "password", + "db_name" + ], + "properties": { + "hostname": { + "type": "string" + }, + "username": { + "type": "string" + }, + "password": { + "type": "string" + }, + "db_name": { + "type": "string" + }, + "port": { + "type": "integer" + } + } + } + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/youbot/datastore/__init__.py b/youbot/datastore/__init__.py new file mode 100644 index 0000000..4ad4a80 --- /dev/null +++ b/youbot/datastore/__init__.py @@ -0,0 +1,7 @@ +"""Cloudstore sub-package of YoutubeCommentBot.""" + +from .mysql_datastore import MySqlDatastore + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/datastore/abstract_datastore.py b/youbot/datastore/abstract_datastore.py new file mode 100644 index 0000000..bde2319 --- /dev/null +++ b/youbot/datastore/abstract_datastore.py @@ -0,0 +1,59 @@ +from abc import ABC, abstractmethod +from typing import List, Dict + + +class AbstractDatastore(ABC): + __slots__ = ('_connection', '_cursor') + + @abstractmethod + def __init__(self, config: Dict) -> None: + """ + Tha basic constructor. Creates a new instance of Datastore using the specified credentials + + :param config: + """ + + self._connection, self._cursor = self.get_connection(username=config['username'], + password=config['password'], + hostname=config['hostname'], + db_name=config['db_name'], + port=config['port']) + + @staticmethod + @abstractmethod + def get_connection(username: str, password: str, hostname: str, db_name: str, port: int): + pass + + @abstractmethod + def create_table(self, table: str, schema: str): + pass + + @abstractmethod + def drop_table(self, table: str) -> None: + pass + + @abstractmethod + def truncate_table(self, table: str) -> None: + pass + + @abstractmethod + def insert_into_table(self, table: str, data: dict) -> None: + pass + + @abstractmethod + def update_table(self, table: str, set_data: dict, where: str) -> None: + pass + + @abstractmethod + def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', + order_by: str = 'NULL', + asc_or_desc: str = 'ASC', limit: int = 1000) -> List: + pass + + @abstractmethod + def delete_from_table(self, table: str, where: str) -> None: + pass + + @abstractmethod + def show_tables(self, *args, **kwargs) -> List: + pass diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py new file mode 100644 index 0000000..28865d4 --- /dev/null +++ b/youbot/datastore/mysql_datastore.py @@ -0,0 +1,192 @@ +from typing import List, Tuple, Dict + +from mysql import connector as mysql_connector +import mysql.connector.cursor + +from .abstract_datastore import AbstractDatastore +from youbot import ColorizedLogger + +logger = ColorizedLogger('MySqlDataStore') + + +class MySqlDatastore(AbstractDatastore): + __slots__ = ('_connection', '_cursor') + + _connection: mysql_connector.MySQLConnection + _cursor: mysql_connector.cursor.MySQLCursor + + def __init__(self, config: Dict) -> None: + """ + The basic constructor. Creates a new instance of Datastore using the specified credentials + + :param config: + """ + + super().__init__(config) + + @staticmethod + def get_connection(username: str, password: str, hostname: str, db_name: str, port: int = 3306) \ + -> Tuple[mysql_connector.MySQLConnection, mysql_connector.cursor.MySQLCursor]: + """ + Creates and returns a connection and a cursor/session to the MySQL DB + + :param username: + :param password: + :param hostname: + :param db_name: + :param port: + :return: + """ + + connection = mysql_connector.connect( + host=hostname, + user=username, + passwd=password, + database=db_name, + use_pure=True + ) + + cursor = connection.cursor() + return connection, cursor + + def create_table(self, table: str, schema: str) -> None: + """ + Creates a table using the specified schema + + :param self: + :param table: + :param schema: + :return: + """ + + query = "CREATE TABLE IF NOT EXISTS {table} ({schema})".format(table=table, schema=schema) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + self._connection.commit() + + def drop_table(self, table: str) -> None: + """ + Drops the specified table if it exists + + :param self: + :param table: + :return: + """ + + query = "DROP TABLE IF EXISTS {table}".format(table=table) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + self._connection.commit() + + def truncate_table(self, table: str) -> None: + """ + Truncates the specified table + + :param self: + :param table: + :return: + """ + + query = "TRUNCATE TABLE {table}".format(table=table) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + self._connection.commit() + + def insert_into_table(self, table: str, data: dict) -> None: + """ + Inserts into the specified table a row based on a column_name: value dictionary + + :param self: + :param table: + :param data: + :return: + """ + + data_str = ", ".join( + list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), data.keys(), data.values()))) + + query = "INSERT INTO {table} SET {data}".format(table=table, data=data_str) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + self._connection.commit() + + def update_table(self, table: str, set_data: dict, where: str) -> None: + """ + Updates the specified table using a column_name: value dictionary and a where statement + + :param self: + :param table: + :param set_data: + :param where: + :return: + """ + + set_data_str = ", ".join( + list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), set_data.keys(), + set_data.values()))) + + query = "UPDATE {table} SET {data} WHERE {where}".format(table=table, data=set_data_str, where=where) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + self._connection.commit() + + def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', order_by: str = 'NULL', + asc_or_desc: str = 'ASC', limit: int = 1000) -> List: + """ + Selects from a specified table based on the given columns, where, ordering and limit + + :param self: + :param table: + :param columns: + :param where: + :param order_by: + :param asc_or_desc: + :param limit: + :return results: + """ + + query = "SELECT {columns} FROM {table} WHERE {where} ORDER BY {order_by} {asc_or_desc} LIMIT {limit}".format( + columns=columns, table=table, where=where, order_by=order_by, asc_or_desc=asc_or_desc, limit=limit) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + results = self._cursor.fetchall() + + return results + + def delete_from_table(self, table: str, where: str) -> None: + """ + Deletes data from the specified table based on a where statement + + :param self: + :param table: + :param where: + :return: + """ + + query = "DELETE FROM {table} WHERE {where}".format(table=table, where=where) + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + self._connection.commit() + + def show_tables(self) -> List: + """ + Show a list of the tables present in the db + :return: + """ + + query = 'SHOW TABLES' + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + results = self._cursor.fetchall() + + return [result[0] for result in results] + + def __exit__(self) -> None: + """ + Flushes and closes the connection + + :return: + """ + + self._connection.commit() + self._cursor.close() diff --git a/youbot/emailer/__init__.py b/youbot/emailer/__init__.py new file mode 100644 index 0000000..d7f864f --- /dev/null +++ b/youbot/emailer/__init__.py @@ -0,0 +1,7 @@ +"""Emailer sub-package of YoutubeCommentBot.""" + +from .gmail_emailer import GmailEmailer + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/emailer/abstract_emailer.py b/youbot/emailer/abstract_emailer.py new file mode 100644 index 0000000..17b85d7 --- /dev/null +++ b/youbot/emailer/abstract_emailer.py @@ -0,0 +1,39 @@ +from abc import ABC, abstractmethod + + +class AbstractEmailer(ABC): + __slots__ = ('_handler',) + + @abstractmethod + def __init__(self, *args, **kwargs) -> None: + """ + Tha basic constructor. Creates a new instance of EmailApp using the specified credentials + + """ + + pass + + @staticmethod + @abstractmethod + def get_handler(*args, **kwargs): + """ + Returns an EmailApp handler. + + :param args: + :param kwargs: + :return: + """ + + pass + + @abstractmethod + def send_email(self, *args, **kwargs): + """ + Sends an email with the specified arguments. + + :param args: + :param kwargs: + :return: + """ + + pass diff --git a/youbot/emailer/gmail_emailer.py b/youbot/emailer/gmail_emailer.py new file mode 100644 index 0000000..cfb00f9 --- /dev/null +++ b/youbot/emailer/gmail_emailer.py @@ -0,0 +1,87 @@ +from typing import List, Dict +import logging +from gmail import GMail, Message + +from .abstract_emailer import AbstractEmailer +from youbot import ColorizedLogger + +logger = ColorizedLogger('GmailEmailer') + + +class GmailEmailer(AbstractEmailer): + __slots__ = ('_handler', 'email_address', 'test_mode') + + _handler: GMail + test_mode: bool + + def __init__(self, config: Dict, test_mode: bool = False) -> None: + """ + The basic constructor. Creates a new instance of EmailApp using the specified credentials + + :param config: + :param test_mode: + """ + + self.email_address = config['email_address'] + self._handler = self.get_handler(email_address=self.email_address, + api_key=config['api_key']) + self.test_mode = test_mode + super().__init__() + + @staticmethod + def get_handler(email_address: str, api_key: str) -> GMail: + """ + Returns an EmailApp handler. + + :param email_address: + :param api_key: + :return: + """ + + gmail_handler = GMail(username=email_address, password=api_key) + gmail_handler.connect() + return gmail_handler + + def is_connected(self) -> bool: + return self._handler.is_connected() + + def get_self_email(self): + return self.email_address + + def send_email(self, subject: str, to: List, cc: List = None, bcc: List = None, text: str = None, + html: str = None, + attachments: List = None, sender: str = None, reply_to: str = None) -> None: + """ + Sends an email with the specified arguments. + + :param subject: + :param to: + :param cc: + :param bcc: + :param text: + :param html: + :param attachments: + :param sender: + :param reply_to: + :return: + """ + + if self.test_mode: + to = self.email_address + cc = self.email_address if cc is not None else None + bcc = self.email_address if bcc is not None else None + + msg = Message(subject=subject, + to=",".join(to), + cc=",".join(cc) if cc is not None else None, + bcc=",".join(bcc) if cc is not None else None, + text=text, + html=html, + attachments=attachments, + sender=sender, + reply_to=reply_to) + logger.debug("Sending email with Message: %s" % msg) + self._handler.send(msg) + + def __exit__(self): + self._handler.close() diff --git a/youbot/fancy_logger/__init__.py b/youbot/fancy_logger/__init__.py new file mode 100644 index 0000000..3e7e604 --- /dev/null +++ b/youbot/fancy_logger/__init__.py @@ -0,0 +1,8 @@ +"""FancyLog sub-package of YoutubeCommentBot.""" + +from .colorized_logger import ColorizedLogger + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" + diff --git a/youbot/fancy_logger/abstract_fancy_logger.py b/youbot/fancy_logger/abstract_fancy_logger.py new file mode 100644 index 0000000..96eea1a --- /dev/null +++ b/youbot/fancy_logger/abstract_fancy_logger.py @@ -0,0 +1,19 @@ +from abc import ABC, abstractmethod + + +class AbstractFancyLogger(ABC): + """Abstract class of the FancyLog package""" + + @abstractmethod + def __init__(self, *args, **kwargs) -> None: + """The basic constructor. Creates a new instance of FancyLog using the + specified arguments + + Args: + *args: + **kwargs: + """ + + @abstractmethod + def create_logger(self, *args, **kwargs): + pass diff --git a/youbot/fancy_logger/colorized_logger.py b/youbot/fancy_logger/colorized_logger.py new file mode 100644 index 0000000..b588075 --- /dev/null +++ b/youbot/fancy_logger/colorized_logger.py @@ -0,0 +1,151 @@ +import os +from typing import List, Union +import types +import logging +from termcolor import colored + +from .abstract_fancy_logger import AbstractFancyLogger + + +class ColorizedLogger(AbstractFancyLogger): + """ColorizedLogger class of the FancyLog package""" + + __slots__ = ('_logger', 'logger_name', '_color', '_on_color', '_attrs', + 'debug', 'info', 'warn', 'warning', 'error', 'exception', 'critical') + + log_fmt: str = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + log_date_fmt: str = '%Y-%m-%d %H:%M:%S' + log_level: Union[int, str] = logging.INFO + _logger: logging.Logger + log_path: str = None + logger_name: str + _color: str + _on_color: str + _attrs: List + + def __init__(self, logger_name: str, + color: str = 'white', on_color: str = None, + attrs: List = None) -> None: + """ + Args: + logger_name (str): + color (str): + attrs (List): AnyOf('bold', 'dark', 'underline', 'blink', 'reverse', 'concealed') + """ + + self._color = color + self._on_color = on_color + self._attrs = attrs if attrs else ['bold'] + self.logger_name = logger_name + self._logger = self.create_logger(logger_name=logger_name) + super().__init__() + + def __getattr__(self, name: str): + """ + Args: + name (str): + """ + + def log_colored(log_text: str, *args, **kwargs): + color = self._color if 'color' not in kwargs else kwargs['color'] + on_color = self._on_color if 'on_color' not in kwargs else kwargs['on_color'] + attrs = self._attrs if 'attrs' not in kwargs else kwargs['attrs'] + colored_text = colored(log_text, color=color, on_color=on_color, attrs=attrs) + return getattr(self._logger, name)(colored_text, *args) + + if name in ['debug', 'info', 'warn', 'warning', + 'error', 'exception', 'critical']: + self.add_file_handler_if_needed(self._logger) + return log_colored + elif name in ['newline', 'nl']: + self.add_file_handler_if_needed(self._logger) + return getattr(self._logger, name) + else: + return AbstractFancyLogger.__getattribute__(self, name) + + @staticmethod + def log_newline(self, num_lines=1): + # Switch handler, output a blank line + if hasattr(self, 'main_file_handler') and hasattr(self, 'blank_file_handler'): + self.removeHandler(self.main_file_handler) + self.addHandler(self.blank_file_handler) + self.removeHandler(self.main_streaming_handler) + self.addHandler(self.blank_streaming_handler) + # Print the new lines + for i in range(num_lines): + self.info('') + # Switch back + if hasattr(self, 'main_file_handler') and hasattr(self, 'blank_file_handler'): + self.removeHandler(self.blank_file_handler) + self.addHandler(self.main_file_handler) + self.removeHandler(self.blank_streaming_handler) + self.addHandler(self.main_streaming_handler) + + def add_file_handler_if_needed(self, logger): + if not (hasattr(logger, 'main_file_handler') and hasattr(logger, 'blank_file_handler')) \ + and self.log_path: + # Create a file handler + self.create_logs_folder(self.log_path) + main_file_handler = logging.FileHandler(self.log_path) + main_file_handler.setLevel(self.log_level) + main_file_handler.setFormatter(logging.Formatter(fmt=self.log_fmt, + datefmt=self.log_date_fmt)) + # Create a "blank line" file handler + blank_file_handler = logging.FileHandler(self.log_path) + blank_file_handler.setLevel(self.log_level) + blank_file_handler.setFormatter(logging.Formatter(fmt='')) + # Add file handlers + logger.addHandler(main_file_handler) + logger.main_file_handler = main_file_handler + logger.blank_file_handler = blank_file_handler + return logger + + def create_logger(self, logger_name: str): + # Create a logger, with the previously-defined handlers + logger = logging.getLogger(logger_name) + logger.handlers = [] + logger.setLevel(self.log_level) + logger = self.add_file_handler_if_needed(logger) + # Create a streaming handler + main_streaming_handler = logging.StreamHandler() + main_streaming_handler.setLevel(self.log_level) + main_streaming_handler.setFormatter(logging.Formatter(fmt=self.log_fmt, + datefmt=self.log_date_fmt)) + # Create a "blank line" streaming handler + blank_streaming_handler = logging.StreamHandler() + blank_streaming_handler.setLevel(self.log_level) + blank_streaming_handler.setFormatter(logging.Formatter(fmt='')) + # Add streaming handlers + logger.addHandler(main_streaming_handler) + logger.propagate = False + logger.main_streaming_handler = main_streaming_handler + logger.blank_streaming_handler = blank_streaming_handler + # Create the new line method + logger.newline = types.MethodType(self.log_newline, logger) + logger.nl = logger.newline + return logger + + @staticmethod + def create_logs_folder(log_path: str): + log_path = os.path.abspath(log_path).split(os.sep) + log_dir = (os.sep.join(log_path[:-1])) + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + @classmethod + def setup_logger(cls, log_path: str, debug: bool = False, clear_log: bool = False) -> None: + """ Sets-up the basic_logger + + Args: + log_path (str): The path where the log file will be saved + debug (bool): Whether to print debug messages or not + clear_log (bool): Whether to empty the log file or not + """ + cls.log_path = os.path.abspath(log_path) + if clear_log: + open(cls.log_path, 'w').close() + cls.log_level = logging.INFO if debug is not True else logging.DEBUG + fancy_log_logger.info(f"Logger is set. Log file path: {cls.log_path}") + + +fancy_log_logger = ColorizedLogger(logger_name='FancyLogger', color='white') diff --git a/youbot/main.py b/youbot/main.py new file mode 100644 index 0000000..8c84537 --- /dev/null +++ b/youbot/main.py @@ -0,0 +1,107 @@ +import traceback +import argparse + +from youbot import Configuration, ColorizedLogger, timeit, profileit, \ + DropboxCloudstore, MySqlDatastore, GmailEmailer + +basic_logger = ColorizedLogger(logger_name='Main', color='yellow') +fancy_logger = ColorizedLogger(logger_name='FancyMain', + color='blue', + on_color='on_red', + attrs=['underline', 'reverse', 'bold']) + + +def get_args() -> argparse.Namespace: + """Setup the argument parser + + Returns: + argparse.Namespace: + """ + parser = argparse.ArgumentParser( + description='A template for python projects.', + add_help=False) + # Required Args + required_args = parser.add_argument_group('Required Arguments') + config_file_params = { + 'type': argparse.FileType('r'), + 'required': True, + 'help': "The configuration yml file" + } + required_args.add_argument('-c', '--config-file', **config_file_params) + required_args.add_argument('-l', '--log', required=True, help="Name of the output log file") + # Optional args + optional_args = parser.add_argument_group('Optional Arguments') + optional_args.add_argument('-m', '--run-mode', choices=['run_mode_1', 'run_mode_2', 'run_mode_3'], + default='run_mode_1', + help='Description of the run modes') + optional_args.add_argument('-d', '--debug', action='store_true', + help='Enables the debug log messages') + optional_args.add_argument("-h", "--help", action="help", help="Show this help message and exit") + + return parser.parse_args() + + +@timeit(custom_print="{func_name} took {duration:2.5f} sec(s) to run!") +def main(): + """This is the main function of main.py + + Example: + python youbot/main.py -m run_mode_1 + -c confs/template_conf.yml + -l logs/output.log + """ + + # Initializing + args = get_args() + ColorizedLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=True) + # Load the configuration + # configuration = Configuration(config_src=args.config_file, + # config_schema_path='yml_schema_strict.json') + configuration = Configuration(config_src=args.config_file) + # Prints + basic_logger.info("Starting in run mode: {0}".format(args.run_mode)) + basic_logger.info("Examples:") + fancy_logger.info("You can customize the logger like this") + fancy_logger.info("You can customize each log message differently", + color="green", on_color="on_white", attrs=[]) + basic_logger.info("If you want to print complete blank lines use nl(num_lines=<#>):") + basic_logger.nl(num_lines=2) + # Example timeit code block + basic_logger.info("You can use timeit either as a function Wrapper or a ContextManager:") + for i in range(5): + custom_print = f"{i}: " + "Iterating in a 10,000-number-range took {duration:2.5f} seconds." + skip = i in [1, 2, 3] + with timeit(custom_print=custom_print, skip=skip): + for _ in range(10000): + pass + # Example profileit code block + basic_logger.info( + "Lastly, you can use profileit either as a function Wrapper or a ContextManager:") + with profileit(): + # CloudStore + cloud_conf = configuration.get_config('cloudstore')[0] + if cloud_conf['type'] == 'dropbox' and cloud_conf['config']['api_key'] != 'DROPBOX_API_KEY': + dropbox_obj = DropboxCloudstore(config=cloud_conf['config']) + basic_logger.info(f"Base folder contents in dropbox:\n{dropbox_obj.ls().keys()}") + # MySqlDatastore + cloud_conf = configuration.get_config('datastore')[0] + if cloud_conf['type'] == 'mysql' and cloud_conf['config']['username'] != 'MYSQL_USERNAME': + mysql_obj = MySqlDatastore(config=cloud_conf['config']) + basic_logger.info(f"List of tables in DB:\n{mysql_obj.show_tables()}") + # GmailEmailer + cloud_conf = configuration.get_config('emailer')[0] + if cloud_conf['type'] == 'gmail' and cloud_conf['config']['api_key'] != 'GMAIL_API_KEY': + basic_logger.info(f"Sending Sample Email to the email address set..") + gmail_obj = GmailEmailer(config=cloud_conf['config']) + gmail_obj.send_email(subject='starter', + to=[gmail_obj.email_address], + text='GmailEmailer works!') + basic_logger.info(f"Done!") + + +if __name__ == '__main__': + try: + main() + except Exception as e: + basic_logger.error(str(e) + '\n' + str(traceback.format_exc())) + raise e diff --git a/youbot/profiling_funcs/__init__.py b/youbot/profiling_funcs/__init__.py new file mode 100644 index 0000000..a78b955 --- /dev/null +++ b/youbot/profiling_funcs/__init__.py @@ -0,0 +1,7 @@ +"""Profileit sub-package of YoutubeCommentBot.""" + +from .profileit import profileit + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/profiling_funcs/profileit.py b/youbot/profiling_funcs/profileit.py new file mode 100644 index 0000000..91b8602 --- /dev/null +++ b/youbot/profiling_funcs/profileit.py @@ -0,0 +1,114 @@ +from contextlib import ContextDecorator +from typing import Callable, IO, List +from io import StringIO +from functools import wraps +import cProfile +import pstats + +from youbot import ColorizedLogger + +profile_logger = ColorizedLogger('Profileit', 'white') + + +class profileit(ContextDecorator): + custom_print: str + profiler: cProfile.Profile + stream: StringIO + sort_by: str + keep_only_these: List + fraction: float + skip: bool + profiler_output: str + file: IO + + def __init__(self, **kwargs): + """Decorator/ContextManager for profiling functions and code blocks + + Args: + custom_print: Custom print string. When used as decorator it can also be formatted using + `func_name`, `args`, and {0}, {1}, .. to reference the function's + first, second, ... argument. + sort_by: pstats sorting column + profiler_output: Filepath where to save the profiling results (.o file) + keep_only_these: List of strings - grep on the profiling output and print only lines + containing any of these strings + fraction: pstats.print_stats() fraction argument + skip: If True, don't time this time. Suitable when inside loops + file: Write the timing output to a file too + """ + + self.profiler = cProfile.Profile() + self.stream = StringIO() + self.sort_by = 'stdname' + self.keep_only_these = [] + self.fraction = 1.0 + self.skip = False + self.__dict__.update(kwargs) + + def __call__(self, func: Callable): + """ This is called only when invoked as a decorator + + Args: + func: The method to wrap + """ + + @wraps(func) + def profiled(*args, **kwargs): + with self._recreate_cm(): + self.func_name = func.__name__ + self.args = args + self.kwargs = kwargs + self.all_args = (*args, *kwargs.values()) if kwargs != {} else args + return func(*args, **kwargs) + + return profiled + + def __enter__(self, *args, **kwargs): + if not self.skip: + self.profiler.enable() + return self + + def __exit__(self, type, value, traceback): + if self.skip: + return + + self.profiler.disable() + ps = pstats.Stats(self.profiler, stream=self.stream).sort_stats(self.sort_by) + ps.print_stats(self.fraction) + + # If used as a decorator + if hasattr(self, 'func_name'): + if not hasattr(self, 'custom_print'): + print_string = 'Func: {func_name!r} with args: {args!r} profiled:' + else: + print_string = self.custom_print + print_string = print_string.format(*self.args, func_name=self.func_name, + args=self.all_args, + **self.kwargs) + # If used as contextmanager + else: + if not hasattr(self, 'custom_print'): + print_string = 'Code block profiled:' + else: + print_string = self.custom_print + + # Get Profiling results + prof_res = self.stream.getvalue() + if len(self.keep_only_these) > 0: + # Keep only lines containing the specified words + prof_res_list = [line for line in prof_res.split('\n') + if any(keep_word in line for keep_word in self.keep_only_these)] + prof_res = '\n'.join(prof_res_list) + + # Print to file if requested + if hasattr(self, 'file'): + self.file.write(print_string) + self.file.write("\n%s" % prof_res) + + # Save profiler output to a file if requested + if hasattr(self, 'profiler_output'): + self.profiler.dump_stats(self.profiler_output) + + # Actual Print + profile_logger.info(print_string) + profile_logger.info("%s", prof_res) diff --git a/youbot/timing_tools/__init__.py b/youbot/timing_tools/__init__.py new file mode 100644 index 0000000..9284b5c --- /dev/null +++ b/youbot/timing_tools/__init__.py @@ -0,0 +1,8 @@ +"""Timeit sub-package of YoutubeCommentBot.""" + +from .timeit import timeit + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" + diff --git a/youbot/timing_tools/timeit.py b/youbot/timing_tools/timeit.py new file mode 100644 index 0000000..7baf384 --- /dev/null +++ b/youbot/timing_tools/timeit.py @@ -0,0 +1,79 @@ +from contextlib import ContextDecorator +from typing import Callable, IO +from functools import wraps +from time import time + +from youbot import ColorizedLogger + +time_logger = ColorizedLogger('Timeit', 'white') + + +class timeit(ContextDecorator): + custom_print: str + skip: bool + file: IO + + def __init__(self, **kwargs): + """Decorator/ContextManager for counting the execution times of functions and code blocks + + Args: + custom_print: Custom print string Use {duration} to reference the running time. + When used as decorator it can also be formatted using + `func_name`, `args`, and {0}, {1}, .. to reference the function's + first, second, ... argument. + skip: If True, don't time this time. Suitable when inside loops + file: Write the timing output to a file too + """ + + self.total = None + self.skip = False + self.internal_only = False + self.__dict__.update(kwargs) + + def __call__(self, func: Callable): + """ This is called only when invoked as a decorator + + Args: + func: The method to wrap + """ + + @wraps(func) + def timed(*args, **kwargs): + with self._recreate_cm(): + self.func_name = func.__name__ + self.args = args + self.kwargs = kwargs + self.all_args = (*args, *kwargs.values()) if kwargs != {} else args + return func(*args, **kwargs) + + return timed + + def __enter__(self, *args, **kwargs): + if not self.skip: + self.ts = time() + return self + + def __exit__(self, type, value, traceback): + if self.skip: + return + + self.te = time() + self.total = self.te - self.ts + if hasattr(self, 'func_name'): + if not hasattr(self, 'custom_print'): + print_string = 'Func: {func_name!r} with args: {args!r} took: {duration:2.5f} sec(s)' + else: + print_string = self.custom_print + time_logger.info(print_string.format(*self.args, func_name=self.func_name, + args=self.all_args, + duration=self.total, + **self.kwargs)) + else: + if not hasattr(self, 'custom_print'): + print_string = 'Code block took: {duration:2.5f} sec(s)' + else: + print_string = self.custom_print + if hasattr(self, 'file'): + self.file.write(print_string.format(duration=self.total)) + if not self.internal_only: + time_logger.info(print_string.format(duration=self.total)) diff --git a/youtubeapi.py b/youtubeapi.py deleted file mode 100644 index 030a105..0000000 --- a/youtubeapi.py +++ /dev/null @@ -1,144 +0,0 @@ -import httplib2 -import os -import sys -import dateutil.parser -from datetime import datetime, timezone, timedelta -from apiclient.discovery import build -from oauth2client.client import flow_from_clientsecrets -from oauth2client.file import Storage -from oauth2client.tools import argparser, run_flow -from oauth2client.client import OAuth2WebServerFlow - - -class YouTube(): - MISSING_CLIENT_SECRETS_MESSAGE = """ - WARNING: Please configure OAuth 2.0 - You will need to provide a client_secrets.json file - """ - - CLIENT_SECRETS_FILE = "keys/client_secrets.json" # The location of the secrets file - CLIENT_ID = "Your Client Id" - CLIENT_SECRET = "Your Client Secret" - YOUTUBE_READONLY_SCOPE = 'https://www.googleapis.com/auth/youtube.force-ssl' - YOUTUBE_API_SERVICE_NAME = "youtube" - YOUTUBE_API_VERSION = "v3" - - - def __init__(self): - """Returns an instance of a wrapper for the YouTube API. - - Uses a file called client_secrets.json to initialize access to the YouTube API - using the 'readonly' scope. - """ - flow = OAuth2WebServerFlow(client_id=self.CLIENT_ID, - client_secret=self.CLIENT_SECRET, - scope=self.YOUTUBE_READONLY_SCOPE) - - storage = Storage("keys/%s-oauth2.json" % sys.argv[0]) - self.credentials = storage.get() - - if self.credentials is None or self.credentials.invalid: - flags = argparser.parse_args() - self.credentials = run_flow(flow, storage, flags) - - self.api = build(self.YOUTUBE_API_SERVICE_NAME, self.YOUTUBE_API_VERSION, - http = self.credentials.authorize(httplib2.Http())) - - - def channel_from_response(self, response): - """Transforms a YouTube API response into a channel object""" - for channel in response['items']: - result = dict() - result['id'] = channel['id'] - result['username'] = channel['snippet']['title'] - result['title'] = None - result['added_on'] = datetime.utcnow().isoformat() - result['last_checked'] = (datetime.utcnow() - timedelta(days=1)).isoformat() - return result - return None - - - def get_channel_by_id(self, id): - """Queries YouTube for a channel using the specified id - - Args: - id (str): The channel ID to search for - """ - channels_response = self.api.channels().list( - id = id, - part = "snippet", - fields = 'items(id,snippet(title))' - ).execute() - return self.channel_from_response(channels_response) - - - def get_channel_by_username(self, username): - """Queries YouTube for a channel using the specified username - - Args: - username (str): The username to search for - """ - channels_response = self.api.channels().list( - forUsername = username, - part = "snippet", - fields = 'items(id,snippet(title))' - ).execute() - channel = self.channel_from_response(channels_response) - if channel is not None: - channel['username'] = username - return channel - - - def get_uploads_playlist(self, uploads_list_id, last_checked): - """Retrieves uploads using the specified playlist ID which - were have been added since the last check. - - Args: - uploads_list_id (str): The ID of the uploads playlist - last_checked (datetime.datetime): When the channel was last checked - """ - playlistitems_request = self.api.playlistItems().list( - playlistId = uploads_list_id, - part = "snippet", - fields = 'items(id,snippet(title,publishedAt,resourceId(videoId)))', - maxResults = 50 - ) - - while playlistitems_request: - playlistitems_response = playlistitems_request.execute() - - for playlist_item in playlistitems_response["items"]: - publishedAt = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) - if (publishedAt >= last_checked):#(last_checked - timedelta(seconds=14)).replace(tzinfo = timezone.utc)): - video = dict() - video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] - video['published_at'] = playlist_item["snippet"]["publishedAt"] - video['title'] = playlist_item["snippet"]["title"] - yield video - else: - return - - playlistitems_request = self.api.playlistItems().list_next( - playlistitems_request, playlistitems_response - ) - - - def get_uploads(self, channels): - """Retrieves new uploads for the specified channels - - Args: - channels(dict): The channels to check (format channel_id => last_checked) - """ - channels_response = self.api.channels().list( - id = ",".join(channels.keys()), - part = "contentDetails,snippet", - fields = "items(id,contentDetails(relatedPlaylists(uploads)),snippet(title))" - ).execute() - - for channel in channels_response["items"]: - uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] - last_checked = dateutil.parser.parse(channels[channel['id']]) - last_checked = last_checked.replace(tzinfo = timezone.utc) - for upload in self.get_uploads_playlist(uploads_list_id, last_checked): - upload['channel_title'] = channel['snippet']['title'] - yield upload From 891b2b6d2403e0f6c3bbbdc2f56ea54f2d8a3f19 Mon Sep 17 00:00:00 2001 From: drkostas Date: Tue, 1 Jun 2021 14:43:13 -0400 Subject: [PATCH 02/33] Cleaned up template #4 --- Procfile | 4 +- TODO.md | 8 +- youbot/__init__.py | 2 - youbot/cli.py | 22 ------ youbot/profiling_funcs/__init__.py | 7 -- youbot/profiling_funcs/profileit.py | 114 ---------------------------- youbot/timing_tools/__init__.py | 8 -- youbot/timing_tools/timeit.py | 79 ------------------- 8 files changed, 6 insertions(+), 238 deletions(-) delete mode 100644 youbot/cli.py delete mode 100644 youbot/profiling_funcs/__init__.py delete mode 100644 youbot/profiling_funcs/profileit.py delete mode 100644 youbot/timing_tools/__init__.py delete mode 100644 youbot/timing_tools/timeit.py diff --git a/Procfile b/Procfile index dabfd4f..467ee6e 100644 --- a/Procfile +++ b/Procfile @@ -1,4 +1,2 @@ run_tests: make run_tests -main: python youbot/main.py -m run_mode_1 -c ../confs/template_conf.yml -l logs/output.log -cli_hello: python youbot/cli.py hello drkostas -cli_bye: python youbot/cli.py bye drkostas --formal \ No newline at end of file +main: python youbot/main.py -m run_mode_1 -c ../confs/template_conf.yml -l logs/output.log \ No newline at end of file diff --git a/TODO.md b/TODO.md index 7679c7c..c778af3 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,7 @@ # TODO See the [issues](https://github.com/drkostas/youbot/issues) too. -- [X] Create Tests -- [X] Create Readme -- [ ] Stop Global Warming +- [X] Load starter +- [ ] Build a Youtube class +- [ ] Create child mysql class +- [ ] Customize configurations +- [ ] Rebuild the main diff --git a/youbot/__init__.py b/youbot/__init__.py index 207593d..2b689dd 100644 --- a/youbot/__init__.py +++ b/youbot/__init__.py @@ -1,8 +1,6 @@ """Top-level package for YoutubeCommentBot.""" from youbot.fancy_logger import ColorizedLogger -from youbot.timing_tools import timeit -from youbot.profiling_funcs import profileit from youbot.configuration import Configuration, validate_json_schema from youbot.cloudstore import DropboxCloudstore from youbot.datastore import MySqlDatastore diff --git a/youbot/cli.py b/youbot/cli.py deleted file mode 100644 index e493b7f..0000000 --- a/youbot/cli.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Command line interface for youbot.""" - -import typer - -app = typer.Typer() - - -@app.command() -def hello(name: str): - typer.echo(f"Hello {name}") - - -@app.command() -def bye(name: str, formal: bool = False): - if formal: - typer.echo(f"Goodbye Mr. {name}. Have a good day.") - else: - typer.echo(f"Bye {name}!") - - -if __name__ == "__main__": - app() diff --git a/youbot/profiling_funcs/__init__.py b/youbot/profiling_funcs/__init__.py deleted file mode 100644 index a78b955..0000000 --- a/youbot/profiling_funcs/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Profileit sub-package of YoutubeCommentBot.""" - -from .profileit import profileit - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" diff --git a/youbot/profiling_funcs/profileit.py b/youbot/profiling_funcs/profileit.py deleted file mode 100644 index 91b8602..0000000 --- a/youbot/profiling_funcs/profileit.py +++ /dev/null @@ -1,114 +0,0 @@ -from contextlib import ContextDecorator -from typing import Callable, IO, List -from io import StringIO -from functools import wraps -import cProfile -import pstats - -from youbot import ColorizedLogger - -profile_logger = ColorizedLogger('Profileit', 'white') - - -class profileit(ContextDecorator): - custom_print: str - profiler: cProfile.Profile - stream: StringIO - sort_by: str - keep_only_these: List - fraction: float - skip: bool - profiler_output: str - file: IO - - def __init__(self, **kwargs): - """Decorator/ContextManager for profiling functions and code blocks - - Args: - custom_print: Custom print string. When used as decorator it can also be formatted using - `func_name`, `args`, and {0}, {1}, .. to reference the function's - first, second, ... argument. - sort_by: pstats sorting column - profiler_output: Filepath where to save the profiling results (.o file) - keep_only_these: List of strings - grep on the profiling output and print only lines - containing any of these strings - fraction: pstats.print_stats() fraction argument - skip: If True, don't time this time. Suitable when inside loops - file: Write the timing output to a file too - """ - - self.profiler = cProfile.Profile() - self.stream = StringIO() - self.sort_by = 'stdname' - self.keep_only_these = [] - self.fraction = 1.0 - self.skip = False - self.__dict__.update(kwargs) - - def __call__(self, func: Callable): - """ This is called only when invoked as a decorator - - Args: - func: The method to wrap - """ - - @wraps(func) - def profiled(*args, **kwargs): - with self._recreate_cm(): - self.func_name = func.__name__ - self.args = args - self.kwargs = kwargs - self.all_args = (*args, *kwargs.values()) if kwargs != {} else args - return func(*args, **kwargs) - - return profiled - - def __enter__(self, *args, **kwargs): - if not self.skip: - self.profiler.enable() - return self - - def __exit__(self, type, value, traceback): - if self.skip: - return - - self.profiler.disable() - ps = pstats.Stats(self.profiler, stream=self.stream).sort_stats(self.sort_by) - ps.print_stats(self.fraction) - - # If used as a decorator - if hasattr(self, 'func_name'): - if not hasattr(self, 'custom_print'): - print_string = 'Func: {func_name!r} with args: {args!r} profiled:' - else: - print_string = self.custom_print - print_string = print_string.format(*self.args, func_name=self.func_name, - args=self.all_args, - **self.kwargs) - # If used as contextmanager - else: - if not hasattr(self, 'custom_print'): - print_string = 'Code block profiled:' - else: - print_string = self.custom_print - - # Get Profiling results - prof_res = self.stream.getvalue() - if len(self.keep_only_these) > 0: - # Keep only lines containing the specified words - prof_res_list = [line for line in prof_res.split('\n') - if any(keep_word in line for keep_word in self.keep_only_these)] - prof_res = '\n'.join(prof_res_list) - - # Print to file if requested - if hasattr(self, 'file'): - self.file.write(print_string) - self.file.write("\n%s" % prof_res) - - # Save profiler output to a file if requested - if hasattr(self, 'profiler_output'): - self.profiler.dump_stats(self.profiler_output) - - # Actual Print - profile_logger.info(print_string) - profile_logger.info("%s", prof_res) diff --git a/youbot/timing_tools/__init__.py b/youbot/timing_tools/__init__.py deleted file mode 100644 index 9284b5c..0000000 --- a/youbot/timing_tools/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Timeit sub-package of YoutubeCommentBot.""" - -from .timeit import timeit - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" - diff --git a/youbot/timing_tools/timeit.py b/youbot/timing_tools/timeit.py deleted file mode 100644 index 7baf384..0000000 --- a/youbot/timing_tools/timeit.py +++ /dev/null @@ -1,79 +0,0 @@ -from contextlib import ContextDecorator -from typing import Callable, IO -from functools import wraps -from time import time - -from youbot import ColorizedLogger - -time_logger = ColorizedLogger('Timeit', 'white') - - -class timeit(ContextDecorator): - custom_print: str - skip: bool - file: IO - - def __init__(self, **kwargs): - """Decorator/ContextManager for counting the execution times of functions and code blocks - - Args: - custom_print: Custom print string Use {duration} to reference the running time. - When used as decorator it can also be formatted using - `func_name`, `args`, and {0}, {1}, .. to reference the function's - first, second, ... argument. - skip: If True, don't time this time. Suitable when inside loops - file: Write the timing output to a file too - """ - - self.total = None - self.skip = False - self.internal_only = False - self.__dict__.update(kwargs) - - def __call__(self, func: Callable): - """ This is called only when invoked as a decorator - - Args: - func: The method to wrap - """ - - @wraps(func) - def timed(*args, **kwargs): - with self._recreate_cm(): - self.func_name = func.__name__ - self.args = args - self.kwargs = kwargs - self.all_args = (*args, *kwargs.values()) if kwargs != {} else args - return func(*args, **kwargs) - - return timed - - def __enter__(self, *args, **kwargs): - if not self.skip: - self.ts = time() - return self - - def __exit__(self, type, value, traceback): - if self.skip: - return - - self.te = time() - self.total = self.te - self.ts - if hasattr(self, 'func_name'): - if not hasattr(self, 'custom_print'): - print_string = 'Func: {func_name!r} with args: {args!r} took: {duration:2.5f} sec(s)' - else: - print_string = self.custom_print - time_logger.info(print_string.format(*self.args, func_name=self.func_name, - args=self.all_args, - duration=self.total, - **self.kwargs)) - else: - if not hasattr(self, 'custom_print'): - print_string = 'Code block took: {duration:2.5f} sec(s)' - else: - print_string = self.custom_print - if hasattr(self, 'file'): - self.file.write(print_string.format(duration=self.total)) - if not self.internal_only: - time_logger.info(print_string.format(duration=self.total)) From 2eacbf83fa25a0accc8a4cecc6d854195d39e5fb Mon Sep 17 00:00:00 2001 From: drkostas Date: Thu, 3 Jun 2021 16:21:37 -0400 Subject: [PATCH 03/33] Create a YoutubeManager class and modified the main.py #4 --- TODO.md | 1 + confs/commenter.yml | 25 ++++++++++ youbot/__init__.py | 1 + youbot/main.py | 65 +++++-------------------- youbot/youtube_utils/__init__.py | 7 +++ youbot/youtube_utils/youtube_manager.py | 36 ++++++++++++++ 6 files changed, 82 insertions(+), 53 deletions(-) create mode 100644 confs/commenter.yml create mode 100644 youbot/youtube_utils/__init__.py create mode 100644 youbot/youtube_utils/youtube_manager.py diff --git a/TODO.md b/TODO.md index c778af3..aabb3cd 100644 --- a/TODO.md +++ b/TODO.md @@ -5,3 +5,4 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [ ] Create child mysql class - [ ] Customize configurations - [ ] Rebuild the main +- [ ] Get channel name automatically diff --git a/confs/commenter.yml b/confs/commenter.yml new file mode 100644 index 0000000..2951c9f --- /dev/null +++ b/confs/commenter.yml @@ -0,0 +1,25 @@ +tag: dev +cloudstore: + - config: + api_key: !ENV ${DROPBOX_API_KEY} + type: dropbox +datastore: + - config: + hostname: !ENV ${MYSQL_HOST} + username: !ENV ${MYSQL_USERNAME} + password: !ENV ${MYSQL_PASSWORD} + db_name: !ENV ${MYSQL_DB_NAME} + port: 3306 + type: mysql +emailer: + - config: + email_address: !ENV ${EMAIL_ADDRESS} + api_key: !ENV ${GMAIL_API_KEY} + type: gmail +youtube: + - config: + client_id: test + client_secret: test2 + api_version: v3 + read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl + channel: mychannel_name diff --git a/youbot/__init__.py b/youbot/__init__.py index 2b689dd..b4223b3 100644 --- a/youbot/__init__.py +++ b/youbot/__init__.py @@ -5,6 +5,7 @@ from youbot.cloudstore import DropboxCloudstore from youbot.datastore import MySqlDatastore from youbot.emailer import GmailEmailer +from youbot.youtube_utils import YoutubeManagerV3 __author__ = "drkostas" __email__ = "georgiou.kostas94@gmail.com" diff --git a/youbot/main.py b/youbot/main.py index 8c84537..0758184 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -1,14 +1,10 @@ import traceback import argparse -from youbot import Configuration, ColorizedLogger, timeit, profileit, \ - DropboxCloudstore, MySqlDatastore, GmailEmailer +from youbot import Configuration, ColorizedLogger, \ + DropboxCloudstore, MySqlDatastore, GmailEmailer, YoutubeManagerV3 -basic_logger = ColorizedLogger(logger_name='Main', color='yellow') -fancy_logger = ColorizedLogger(logger_name='FancyMain', - color='blue', - on_color='on_red', - attrs=['underline', 'reverse', 'bold']) +logger = ColorizedLogger(logger_name='Main', color='yellow') def get_args() -> argparse.Namespace: @@ -41,13 +37,12 @@ def get_args() -> argparse.Namespace: return parser.parse_args() -@timeit(custom_print="{func_name} took {duration:2.5f} sec(s) to run!") def main(): - """This is the main function of main.py + """ This is the main function of main.py Example: python youbot/main.py -m run_mode_1 - -c confs/template_conf.yml + -c confs/conf.yml -l logs/output.log """ @@ -55,53 +50,17 @@ def main(): args = get_args() ColorizedLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=True) # Load the configuration - # configuration = Configuration(config_src=args.config_file, - # config_schema_path='yml_schema_strict.json') - configuration = Configuration(config_src=args.config_file) - # Prints - basic_logger.info("Starting in run mode: {0}".format(args.run_mode)) - basic_logger.info("Examples:") - fancy_logger.info("You can customize the logger like this") - fancy_logger.info("You can customize each log message differently", - color="green", on_color="on_white", attrs=[]) - basic_logger.info("If you want to print complete blank lines use nl(num_lines=<#>):") - basic_logger.nl(num_lines=2) - # Example timeit code block - basic_logger.info("You can use timeit either as a function Wrapper or a ContextManager:") - for i in range(5): - custom_print = f"{i}: " + "Iterating in a 10,000-number-range took {duration:2.5f} seconds." - skip = i in [1, 2, 3] - with timeit(custom_print=custom_print, skip=skip): - for _ in range(10000): - pass - # Example profileit code block - basic_logger.info( - "Lastly, you can use profileit either as a function Wrapper or a ContextManager:") - with profileit(): - # CloudStore - cloud_conf = configuration.get_config('cloudstore')[0] - if cloud_conf['type'] == 'dropbox' and cloud_conf['config']['api_key'] != 'DROPBOX_API_KEY': - dropbox_obj = DropboxCloudstore(config=cloud_conf['config']) - basic_logger.info(f"Base folder contents in dropbox:\n{dropbox_obj.ls().keys()}") - # MySqlDatastore - cloud_conf = configuration.get_config('datastore')[0] - if cloud_conf['type'] == 'mysql' and cloud_conf['config']['username'] != 'MYSQL_USERNAME': - mysql_obj = MySqlDatastore(config=cloud_conf['config']) - basic_logger.info(f"List of tables in DB:\n{mysql_obj.show_tables()}") - # GmailEmailer - cloud_conf = configuration.get_config('emailer')[0] - if cloud_conf['type'] == 'gmail' and cloud_conf['config']['api_key'] != 'GMAIL_API_KEY': - basic_logger.info(f"Sending Sample Email to the email address set..") - gmail_obj = GmailEmailer(config=cloud_conf['config']) - gmail_obj.send_email(subject='starter', - to=[gmail_obj.email_address], - text='GmailEmailer works!') - basic_logger.info(f"Done!") + conf_obj = Configuration(config_src=args.config_file) + you_conf = conf_obj.get_config('youtube')[0] + # Setup Youtube API + youmanager = YoutubeManagerV3(config=you_conf['config'], channel_name=you_conf['channel']) + logger.info(youmanager.channel_name) + logger.info(youmanager._api) if __name__ == '__main__': try: main() except Exception as e: - basic_logger.error(str(e) + '\n' + str(traceback.format_exc())) + logger.error(str(e) + '\n' + str(traceback.format_exc())) raise e diff --git a/youbot/youtube_utils/__init__.py b/youbot/youtube_utils/__init__.py new file mode 100644 index 0000000..e40960d --- /dev/null +++ b/youbot/youtube_utils/__init__.py @@ -0,0 +1,7 @@ +"""Youtube Utils sub-package of YoutubeCommentBot.""" + +from .youtube_manager import YoutubeManagerV3 + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py new file mode 100644 index 0000000..92fe72e --- /dev/null +++ b/youbot/youtube_utils/youtube_manager.py @@ -0,0 +1,36 @@ +from typing import List, Tuple, Dict +from abc import ABC, abstractmethod + +from youbot import ColorizedLogger + +logger = ColorizedLogger('YoutubeManager') + + +class AbstractYoutubeManager(ABC): + __slots__ = ('channel_name', '_api') + + @abstractmethod + def __init__(self, config: Dict, channel_name: str) -> None: + """ + The basic constructor. Creates a new instance of YoutubeManager using the specified credentials + + :param config: + """ + + self.channel_name = channel_name + self._api = self._build_api(**config) + + @staticmethod + @abstractmethod + def _build_api(*args, **kwargs): + pass + + +class YoutubeManagerV3(AbstractYoutubeManager): + def __init__(self, config: Dict, channel_name: str): + super().__init__(config, channel_name) + + @staticmethod + def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str): + # Build a youtube api connection + return 'test' From 8c5cb3a5899fe3c99d2b04e9315a8b4e78e68f59 Mon Sep 17 00:00:00 2001 From: drkostas Date: Thu, 3 Jun 2021 23:45:13 -0400 Subject: [PATCH 04/33] Created oath connection with google api #4 --- confs/commenter.yml | 6 ++--- requirements.txt | 5 ++++ youbot/main.py | 11 ++++---- youbot/youtube_utils/youtube_manager.py | 35 ++++++++++++++++++++----- 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/confs/commenter.yml b/confs/commenter.yml index 2951c9f..d9c158a 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -1,4 +1,4 @@ -tag: dev +tag: commenter cloudstore: - config: api_key: !ENV ${DROPBOX_API_KEY} @@ -18,8 +18,8 @@ emailer: type: gmail youtube: - config: - client_id: test - client_secret: test2 + client_id: !ENV ${CLIENT_ID} + client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl channel: mychannel_name diff --git a/requirements.txt b/requirements.txt index a3ce04e..c2de485 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,14 @@ dropbox~=11.10.0 gmail~=0.6.3 +google-api-python-client==2.7.0 +google-auth-oauthlib==0.4.4 jsonschema~=3.2.0 +httplib2==0.19.1 mysql-connector-python~=8.0.19 mysql-connector~=2.2.9 +oauth2client==4.1.3 PyYAML~=5.4.1 +requests~=2.25.1 setuptools~=52.0.0 termcolor~=1.1.0 typer~=0.3.2 diff --git a/youbot/main.py b/youbot/main.py index 0758184..07cd00d 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -41,9 +41,7 @@ def main(): """ This is the main function of main.py Example: - python youbot/main.py -m run_mode_1 - -c confs/conf.yml - -l logs/output.log + python youbot/main.py -m run_mode_1 -c confs/conf.yml -l logs/output.log """ # Initializing @@ -53,9 +51,10 @@ def main(): conf_obj = Configuration(config_src=args.config_file) you_conf = conf_obj.get_config('youtube')[0] # Setup Youtube API - youmanager = YoutubeManagerV3(config=you_conf['config'], channel_name=you_conf['channel']) - logger.info(youmanager.channel_name) - logger.info(youmanager._api) + yout_manager = YoutubeManagerV3(config=you_conf['config'], + channel_name=you_conf['channel'], + tag=conf_obj.tag) + logger.info(yout_manager.channel_name) if __name__ == '__main__': diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 92fe72e..d66b900 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,5 +1,11 @@ from typing import List, Tuple, Dict from abc import ABC, abstractmethod +import os +from oauth2client.file import Storage +from oauth2client.tools import argparser, run_flow +from oauth2client.client import OAuth2WebServerFlow +from googleapiclient.discovery import build +import httplib2 from youbot import ColorizedLogger @@ -7,10 +13,10 @@ class AbstractYoutubeManager(ABC): - __slots__ = ('channel_name', '_api') + __slots__ = ('channel_name', '_api', 'tag') @abstractmethod - def __init__(self, config: Dict, channel_name: str) -> None: + def __init__(self, config: Dict, channel_name: str, tag: str) -> None: """ The basic constructor. Creates a new instance of YoutubeManager using the specified credentials @@ -18,7 +24,8 @@ def __init__(self, config: Dict, channel_name: str) -> None: """ self.channel_name = channel_name - self._api = self._build_api(**config) + self.tag = tag + self._api = self._build_api(**config, tag=self.tag) @staticmethod @abstractmethod @@ -27,10 +34,24 @@ def _build_api(*args, **kwargs): class YoutubeManagerV3(AbstractYoutubeManager): - def __init__(self, config: Dict, channel_name: str): - super().__init__(config, channel_name) + def __init__(self, config: Dict, channel_name: str, tag: str): + super().__init__(config, channel_name, tag) + print(type(self._api)) @staticmethod - def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str): + def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, + tag: str): # Build a youtube api connection - return 'test' + flow = OAuth2WebServerFlow(client_id=client_id, + client_secret=client_secret, + scope=read_only_scope) + key_path = os.path.join('..', 'keys', f'{tag}.json') + storage = Storage(key_path) + credentials = storage.get() + + if credentials is None or credentials.invalid: + flags = argparser.parse_args(args=['--noauth_local_webserver']) + credentials = run_flow(flow, storage, flags) + + api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) + return api From 1a7d069a42c197997567e0aed5508dba6ad9738a Mon Sep 17 00:00:00 2001 From: drkostas Date: Fri, 4 Jun 2021 13:45:41 -0400 Subject: [PATCH 05/33] Created functions for get the channel info, the upload playlist ids, and the latest videos #4 --- requirements.txt | 11 +- youbot/main.py | 8 +- youbot/youtube_utils/youtube_manager.py | 158 +++++++++++++++++++++++- 3 files changed, 167 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index c2de485..826dc79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,15 @@ dropbox~=11.10.0 gmail~=0.6.3 -google-api-python-client==2.7.0 -google-auth-oauthlib==0.4.4 +google-api-python-client~=2.7.0 +google-auth-oauthlib~=0.4.4 jsonschema~=3.2.0 -httplib2==0.19.1 +httplib2~=0.19.1 mysql-connector-python~=8.0.19 mysql-connector~=2.2.9 -oauth2client==4.1.3 +oauth2client~=4.1.3 +python-dateutil~=2.8.1 PyYAML~=5.4.1 requests~=2.25.1 setuptools~=52.0.0 termcolor~=1.1.0 -typer~=0.3.2 +typer~=0.3.2 \ No newline at end of file diff --git a/youbot/main.py b/youbot/main.py index 07cd00d..8b5fd4e 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -54,8 +54,14 @@ def main(): yout_manager = YoutubeManagerV3(config=you_conf['config'], channel_name=you_conf['channel'], tag=conf_obj.tag) - logger.info(yout_manager.channel_name) + # Test the video retrieval for 3 channels + pewd_info = yout_manager.get_channel_info_by_username('Pewdiepie') + v_info = yout_manager.get_channel_info_by_username('Veritasium') + ku_info = yout_manager.get_channel_info_by_username('Kurzgesagt') + channel_ids = [pewd_info['id'], v_info['id'], ku_info['id']] + for video in yout_manager.get_uploads(channels=channel_ids): + logger.info(video) if __name__ == '__main__': try: diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index d66b900..04ff794 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,9 +1,13 @@ -from typing import List, Tuple, Dict +from typing import List, Tuple, Dict, Union from abc import ABC, abstractmethod import os +import math +from datetime import datetime, timedelta, timezone +import dateutil.parser from oauth2client.file import Storage from oauth2client.tools import argparser, run_flow from oauth2client.client import OAuth2WebServerFlow +import googleapiclient from googleapiclient.discovery import build import httplib2 @@ -36,12 +40,21 @@ def _build_api(*args, **kwargs): class YoutubeManagerV3(AbstractYoutubeManager): def __init__(self, config: Dict, channel_name: str, tag: str): super().__init__(config, channel_name, tag) - print(type(self._api)) @staticmethod def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, - tag: str): - # Build a youtube api connection + tag: str) -> googleapiclient.discovery.Resource: + """ + Build a youtube api connection. + + Args: + client_id: + client_secret: + api_version: + read_only_scope: + tag: + """ + flow = OAuth2WebServerFlow(client_id=client_id, client_secret=client_secret, scope=read_only_scope) @@ -55,3 +68,140 @@ def _build_api(client_id: str, client_secret: str, api_version: str, read_only_s api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) return api + + @staticmethod + def channel_from_response(response: Dict) -> Union[Dict, None]: + """ + Transforms a YouTube API response into a channel Dict. + + Args: + response: + """ + + for channel in response['items']: + result = dict() + result['id'] = channel['id'] + result['username'] = channel['snippet']['title'] + result['title'] = None + result['added_on'] = datetime.utcnow().isoformat() + result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() + return result + return None + + def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: + """Queries YouTube for a channel using the specified username + + Args: + username (str): The username to search for + """ + + channels_response = self._api.channels().list( + forUsername=username, + part="snippet", + fields='items(id,snippet(title))' + ).execute() + if channels_response: + channel = self.channel_from_response(channels_response) + if channel is not None: + channel['username'] = username + else: + logger.warning(f"Got empty response for channel username: {username}") + channel = {} + return channel + + def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: + """ Queries YouTube for a channel using the specified channel id. + + Args: + channel_id (str): The channel ID to search for + """ + + channels_response = self._api.channels().list( + id=channel_id, + part="snippet", + fields='items(id,snippet(title))' + ).execute() + + return self.channel_from_response(channels_response) + + def get_uploads(self, channels: List) -> Dict: + """ Retrieves new uploads for the specified channels. + + Args: + channels(list): A list with channel IDs + """ + + # Separate the channels list in 50-sized channel lists + channels_lists = self.split_list(channels, 50) + channels_to_check = [] + # Get the Playlist IDs of each channel + for channels in channels_lists: + channels_response = self._api.channels().list( + id=",".join(channels), + part="contentDetails,snippet", + fields="items(id,contentDetails(relatedPlaylists(uploads)),snippet(title))" + ).execute() + channels_to_check.extend(channels_response["items"]) + # For each playlist ID, get 50 videos + for channel in channels_to_check: + uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] + for upload in self.get_uploads_playlist(uploads_list_id): + upload['channel_title'] = channel['snippet']['title'] + upload['channel_id'] = channel['id'] + yield upload + + @staticmethod + def split_list(input_list: List, chunk_size: int) -> List: + """ + Split a list into `chunk_size` sub-lists. + + Args: + input_list: + chunk_size: + """ + + chunks = math.ceil(len(input_list) / chunk_size) + if chunks == 1: + output_list = [input_list] + else: + output_list = [] + end = 0 + for i in range(chunks - 1): + start = i * chunk_size + end = (i + 1) * chunk_size + output_list.append(input_list[start:end]) + output_list.append(input_list[end:]) + + return output_list + + def get_uploads_playlist(self, uploads_list_id: str) -> Dict: + """ Retrieves uploads using the specified playlist ID which were have been added + since the last check. + + Args: + uploads_list_id (str): The ID of the uploads playlist + """ + + # Construct the request + playlist_items_request = self._api.playlistItems().list( + playlistId=uploads_list_id, + part="snippet", + fields='items(id,snippet(title,publishedAt,resourceId(videoId)))', + maxResults=50 + ) + + while playlist_items_request: + playlist_items_response = playlist_items_request.execute() + for playlist_item in playlist_items_response["items"]: + published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) + video = dict() + # Return the video only if it was published in the last 2 hours + if published_at >= (datetime.utcnow() - timedelta(hours=2)).replace( + tzinfo=timezone.utc): + video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] + video['published_at'] = playlist_item["snippet"]["publishedAt"] + video['title'] = playlist_item["snippet"]["title"] + yield video + playlist_items_request = self._api.playlistItems().list_next( + playlist_items_request, playlist_items_response + ) From 7a0889778a0edf5d6d322f7cbe6228c0abb9c197 Mon Sep 17 00:00:00 2001 From: drkostas Date: Fri, 4 Jun 2021 14:06:31 -0400 Subject: [PATCH 06/33] Stop getting video IDs if they surpassed the time threshold set #4 --- .gitignore | 2 +- youbot/main.py | 5 +++-- youbot/youtube_utils/youtube_manager.py | 20 ++++++++++++-------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 95547f8..682cef8 100644 --- a/.gitignore +++ b/.gitignore @@ -135,7 +135,7 @@ dmypy.json /.idea # Tmp files -*tmp.* +*tmp*.* # Tars *.gz diff --git a/youbot/main.py b/youbot/main.py index 8b5fd4e..a639369 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -8,7 +8,7 @@ def get_args() -> argparse.Namespace: - """Setup the argument parser + """ Setup the argument parser. Returns: argparse.Namespace: @@ -60,9 +60,10 @@ def main(): v_info = yout_manager.get_channel_info_by_username('Veritasium') ku_info = yout_manager.get_channel_info_by_username('Kurzgesagt') channel_ids = [pewd_info['id'], v_info['id'], ku_info['id']] - for video in yout_manager.get_uploads(channels=channel_ids): + for video in yout_manager.get_uploads(channels=channel_ids, last_n_hours=12000): logger.info(video) + if __name__ == '__main__': try: main() diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 04ff794..3f65ddc 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -70,7 +70,7 @@ def _build_api(client_id: str, client_secret: str, api_version: str, read_only_s return api @staticmethod - def channel_from_response(response: Dict) -> Union[Dict, None]: + def _channel_from_response(response: Dict) -> Union[Dict, None]: """ Transforms a YouTube API response into a channel Dict. @@ -101,7 +101,7 @@ def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: fields='items(id,snippet(title))' ).execute() if channels_response: - channel = self.channel_from_response(channels_response) + channel = self._channel_from_response(channels_response) if channel is not None: channel['username'] = username else: @@ -122,13 +122,14 @@ def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: fields='items(id,snippet(title))' ).execute() - return self.channel_from_response(channels_response) + return self._channel_from_response(channels_response) - def get_uploads(self, channels: List) -> Dict: + def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: """ Retrieves new uploads for the specified channels. Args: channels(list): A list with channel IDs + last_n_hours: """ # Separate the channels list in 50-sized channel lists @@ -145,7 +146,7 @@ def get_uploads(self, channels: List) -> Dict: # For each playlist ID, get 50 videos for channel in channels_to_check: uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] - for upload in self.get_uploads_playlist(uploads_list_id): + for upload in self._get_uploads_playlist(uploads_list_id, last_n_hours): upload['channel_title'] = channel['snippet']['title'] upload['channel_id'] = channel['id'] yield upload @@ -174,7 +175,7 @@ def split_list(input_list: List, chunk_size: int) -> List: return output_list - def get_uploads_playlist(self, uploads_list_id: str) -> Dict: + def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> Dict: """ Retrieves uploads using the specified playlist ID which were have been added since the last check. @@ -195,13 +196,16 @@ def get_uploads_playlist(self, uploads_list_id: str) -> Dict: for playlist_item in playlist_items_response["items"]: published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) video = dict() - # Return the video only if it was published in the last 2 hours - if published_at >= (datetime.utcnow() - timedelta(hours=2)).replace( + # Return the video only if it was published in the last `last_n_hours` hours + if published_at >= (datetime.utcnow() - timedelta(hours=last_n_hours)).replace( tzinfo=timezone.utc): video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] video['published_at'] = playlist_item["snippet"]["publishedAt"] video['title'] = playlist_item["snippet"]["title"] yield video + # else: + # return + playlist_items_request = self._api.playlistItems().list_next( playlist_items_request, playlist_items_response ) From 7be464846466000eabdc71360ce191bd60fcc7b5 Mon Sep 17 00:00:00 2001 From: drkostas Date: Fri, 4 Jun 2021 15:04:51 -0400 Subject: [PATCH 07/33] Created get_video_comments() and get_profile_pictures(), getting self username automatically #4 --- TODO.md | 8 +- confs/commenter.yml | 2 +- youbot/youtube_utils/youtube_manager.py | 135 +++++++++++++++++++----- 3 files changed, 116 insertions(+), 29 deletions(-) diff --git a/TODO.md b/TODO.md index aabb3cd..f304240 100644 --- a/TODO.md +++ b/TODO.md @@ -1,8 +1,8 @@ # TODO See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Load starter -- [ ] Build a Youtube class +- [X] Get channel name automatically +- [ ] Build Youtube Manager class - [ ] Create child mysql class -- [ ] Customize configurations -- [ ] Rebuild the main -- [ ] Get channel name automatically +- [ ] Create the workflow for the commenter +- [ ] Create the workflow for the accumulator diff --git a/confs/commenter.yml b/confs/commenter.yml index d9c158a..27aff94 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -22,4 +22,4 @@ youtube: client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - channel: mychannel_name + type: normal diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 3f65ddc..5f68320 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,6 +1,7 @@ from typing import List, Tuple, Dict, Union from abc import ABC, abstractmethod import os +import re import math from datetime import datetime, timedelta, timezone import dateutil.parser @@ -20,26 +21,30 @@ class AbstractYoutubeManager(ABC): __slots__ = ('channel_name', '_api', 'tag') @abstractmethod - def __init__(self, config: Dict, channel_name: str, tag: str) -> None: + def __init__(self, config: Dict, tag: str) -> None: """ The basic constructor. Creates a new instance of YoutubeManager using the specified credentials :param config: """ - self.channel_name = channel_name self.tag = tag self._api = self._build_api(**config, tag=self.tag) + self.channel_name = self._get_my_username() @staticmethod @abstractmethod def _build_api(*args, **kwargs): pass + @abstractmethod + def _get_my_username(self) -> str: + pass + class YoutubeManagerV3(AbstractYoutubeManager): - def __init__(self, config: Dict, channel_name: str, tag: str): - super().__init__(config, channel_name, tag) + def __init__(self, config: Dict, tag: str): + super().__init__(config, tag) @staticmethod def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, @@ -69,27 +74,25 @@ def _build_api(client_id: str, client_secret: str, api_version: str, read_only_s api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) return api - @staticmethod - def _channel_from_response(response: Dict) -> Union[Dict, None]: - """ - Transforms a YouTube API response into a channel Dict. + def _get_my_username(self) -> str: + channels_response = self._api.channels().list( + part="snippet", + fields='items(id,snippet(title))', + mine='true' + ).execute() + if channels_response: + my_username = self._channel_from_response(channels_response)['username'] + else: + error_msg = "Got empty response when trying to get the self username." + logger.error("Got empty response when trying to get the self username.") + raise Exception(error_msg) + return my_username - Args: - response: - """ - - for channel in response['items']: - result = dict() - result['id'] = channel['id'] - result['username'] = channel['snippet']['title'] - result['title'] = None - result['added_on'] = datetime.utcnow().isoformat() - result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() - return result - return None + def comment(self, video_id: str, comment_text: str) -> None: + raise NotImplementedError() def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: - """Queries YouTube for a channel using the specified username + """ Queries YouTube for a channel using the specified username. Args: username (str): The username to search for @@ -151,6 +154,90 @@ def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: upload['channel_id'] = channel['id'] yield upload + def get_video_comments(self, url: str, search_terms: str = None) -> List: + """ Populates a list with comments (and their replies). + + Args: + url: + search_terms: + """ + + if not search_terms: + search_terms = self.channel_name + video_id = re.search(r"^.*(youtu\.be\/|vi?\/|u\/\w\/|embed\/|\?vi?=|\&vi?=)([^#\&\?]*).*", + url).group(2) + page_token = "" # "&pageToken={}".format(page_token) + comment_threads_response = self._api.commentThreads().list( + part="snippet", + maxResults=100, + videoId="{}{}".format(video_id, page_token), + searchTerms=search_terms + ).execute() + + comments = [] + for comment_thread in comment_threads_response['items']: + channel_name = comment_thread['snippet']['topLevelComment']['snippet']['authorDisplayName'] + if channel_name == self.channel_name: + current_comment = {"url": url, "video_id": video_id, "comment_id": comment_thread['id'], + "like_count": + comment_thread['snippet']['topLevelComment']['snippet'][ + 'likeCount'], + "reply_count": comment_thread['snippet']['totalReplyCount']} + comments.append(current_comment) + + return comments + + def get_profile_pictures(self, channels: List = None) -> List[Tuple[str, str]]: + """ Gets the profile picture urls for a list of channel ids (or for the self channel). + + Args: + channels: + + Returns: + profile_pictures: [(channel_id, thumbnail_url), ..] + """ + + if channels is None: + profile_pictures_request = self._api.channels().list( + mine="true", + part="snippet", + fields='items(id,snippet(thumbnails(default)))' + ) + else: + profile_pictures_request = self._api.channels().list( + id=",".join(channels), + part="snippet", + fields='items(id,snippet(thumbnails(default)))' + ) + + profile_pictures_response = profile_pictures_request.execute() + + profile_pictures_result = [] + for profile_picture in profile_pictures_response["items"]: + profile_pictures_result.append( + (profile_picture["id"], profile_picture["snippet"]["thumbnails"]["default"]["url"])) + + return profile_pictures_result + + @staticmethod + def _channel_from_response(response: Dict) -> Union[Dict, None]: + """ + Transforms a YouTube API response into a channel Dict. + + Args: + response: + """ + + for channel in response['items']: + result = dict() + result['id'] = channel['id'] + result['username'] = channel['snippet']['title'] + result['title'] = None + result['added_on'] = datetime.utcnow().isoformat() + result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() + return result + return None + @staticmethod def split_list(input_list: List, chunk_size: int) -> List: """ @@ -203,8 +290,8 @@ def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> video['published_at'] = playlist_item["snippet"]["publishedAt"] video['title'] = playlist_item["snippet"]["title"] yield video - # else: - # return + else: + return playlist_items_request = self._api.playlistItems().list_next( playlist_items_request, playlist_items_response From 41d2d8f69a4a482cdc316320395a0177653d9cd1 Mon Sep 17 00:00:00 2001 From: drkostas Date: Wed, 9 Jun 2021 15:53:16 -0400 Subject: [PATCH 08/33] Finished the YouTube Manager class #4 --- TODO.md | 9 ++- youbot/youtube_utils/youtube_manager.py | 94 ++++++++++++++++++++++--- 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/TODO.md b/TODO.md index f304240..8a4e4f2 100644 --- a/TODO.md +++ b/TODO.md @@ -2,7 +2,12 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Load starter - [X] Get channel name automatically -- [ ] Build Youtube Manager class -- [ ] Create child mysql class +- [X] Build YouTube Manager class +- [ ] Create child MySQL class +- [ ] Roll the comments for each channel - [ ] Create the workflow for the commenter - [ ] Create the workflow for the accumulator +- [ ] Add SQL script for creating the tables needed +- [ ] Recreate the Livestreaming module +- [ ] Improve the youtube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) +- [ ] Use multiple account (different api keys) to check for new commetns diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 5f68320..85cd338 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Dict, Union +from typing import List, Tuple, Dict, Union, Any from abc import ABC, abstractmethod import os import re @@ -18,7 +18,7 @@ class AbstractYoutubeManager(ABC): - __slots__ = ('channel_name', '_api', 'tag') + __slots__ = ('channel_name', 'channel_id', '_api', 'tag') @abstractmethod def __init__(self, config: Dict, tag: str) -> None: @@ -30,7 +30,7 @@ def __init__(self, config: Dict, tag: str) -> None: self.tag = tag self._api = self._build_api(**config, tag=self.tag) - self.channel_name = self._get_my_username() + self.channel_name, self.channel_id = self._get_my_username_and_id() @staticmethod @abstractmethod @@ -38,7 +38,7 @@ def _build_api(*args, **kwargs): pass @abstractmethod - def _get_my_username(self) -> str: + def _get_my_username_and_id(self) -> str: pass @@ -74,22 +74,32 @@ def _build_api(client_id: str, client_secret: str, api_version: str, read_only_s api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) return api - def _get_my_username(self) -> str: + def _get_my_username_and_id(self) -> Tuple[str, str]: channels_response = self._api.channels().list( part="snippet", fields='items(id,snippet(title))', mine='true' ).execute() if channels_response: - my_username = self._channel_from_response(channels_response)['username'] + channel_info = self._channel_from_response(channels_response) + my_username = channel_info['username'] + my_id = channel_info['id'] else: error_msg = "Got empty response when trying to get the self username." - logger.error("Got empty response when trying to get the self username.") + logger.error(error_msg) raise Exception(error_msg) - return my_username + return my_username, my_id def comment(self, video_id: str, comment_text: str) -> None: - raise NotImplementedError() + + try: + properties = {'snippet.channelId': self.channel_id, + 'snippet.videoId': video_id, + 'snippet.topLevelComment.snippet.textOriginal': comment_text} + self._comment_threads_insert(properties=properties, + part='snippet') + except Exception as exc: + logger.error(f"An error occurred:\n{exc}") def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: """ Queries YouTube for a channel using the specified username. @@ -178,7 +188,8 @@ def get_video_comments(self, url: str, search_terms: str = None) -> List: for comment_thread in comment_threads_response['items']: channel_name = comment_thread['snippet']['topLevelComment']['snippet']['authorDisplayName'] if channel_name == self.channel_name: - current_comment = {"url": url, "video_id": video_id, "comment_id": comment_thread['id'], + current_comment = {"url": url, "video_id": video_id, + "comment_id": comment_thread['id'], "like_count": comment_thread['snippet']['topLevelComment']['snippet'][ 'likeCount'], @@ -296,3 +307,66 @@ def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> playlist_items_request = self._api.playlistItems().list_next( playlist_items_request, playlist_items_response ) + + def _comment_threads_insert(self, properties: Dict, **kwargs: Any) -> Dict: + """ Comment using the Youtube API. + Args: + properties: + **kwargs: + """ + + resource = self._build_resource(properties) + kwargs = self._remove_empty_kwargs(**kwargs) + response = self._api.commentThreads().insert(body=resource, **kwargs).execute() + return response + + @staticmethod + def _build_resource(properties: Dict) -> Dict: + """ Build a resource based on a list of properties given as key-value pairs. + Leave properties with empty values out of the inserted resource. """ + + resource = {} + for p in properties: + # Given a key like "snippet.title", split into "snippet" and "title", where + # "snippet" will be an object and "title" will be a property in that object. + prop_array = p.split('.') + ref = resource + for pa in range(0, len(prop_array)): + is_array = False + key = prop_array[pa] + # For properties that have array values, convert a name like + # "snippet.tags[]" to snippet.tags, and set a flag to handle + # the value as an array. + if key[-2:] == '[]': + key = key[0:len(key) - 2:] + is_array = True + if pa == (len(prop_array) - 1): + # Leave properties without values out of inserted resource. + if properties[p]: + if is_array: + ref[key] = properties[p].split(',') + else: + ref[key] = properties[p] + elif key not in ref: + # For example, the property is "snippet.title", but the resource does + # not yet have a "snippet" object. Create the snippet object here. + # Setting "ref = ref[key]" means that in the next time through the + # "for pa in range ..." loop, we will be setting a property in the + # resource's "snippet" object. + ref[key] = {} + ref = ref[key] + else: + # For example, the property is "snippet.description", and the resource + # already has a "snippet" object. + ref = ref[key] + return resource + + @staticmethod + def _remove_empty_kwargs(**kwargs: Any) -> Dict: + """ Remove keyword arguments that are not set. """ + good_kwargs = {} + if kwargs is not None: + for key, value in kwargs.items(): + if value: + good_kwargs[key] = value + return good_kwargs From 616cdfdf3bda2d30ce24c282885efa32d8c37513 Mon Sep 17 00:00:00 2001 From: drkostas Date: Wed, 9 Jun 2021 17:51:13 -0400 Subject: [PATCH 09/33] Started building the YoutubeMysqlDatastore class #4 --- youbot/datastore/__init__.py | 2 +- youbot/datastore/mysql_datastore.py | 57 ++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/youbot/datastore/__init__.py b/youbot/datastore/__init__.py index 4ad4a80..6918bc4 100644 --- a/youbot/datastore/__init__.py +++ b/youbot/datastore/__init__.py @@ -1,6 +1,6 @@ """Cloudstore sub-package of YoutubeCommentBot.""" -from .mysql_datastore import MySqlDatastore +from .mysql_datastore import YoutubeMySqlDatastore __author__ = "drkostas" __email__ = "georgiou.kostas94@gmail.com" diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py index 28865d4..29354d4 100644 --- a/youbot/datastore/mysql_datastore.py +++ b/youbot/datastore/mysql_datastore.py @@ -103,7 +103,8 @@ def insert_into_table(self, table: str, data: dict) -> None: """ data_str = ", ".join( - list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), data.keys(), data.values()))) + list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), data.keys(), + data.values()))) query = "INSERT INTO {table} SET {data}".format(table=table, data=data_str) logger.debug("Executing: %s" % query) @@ -122,15 +123,18 @@ def update_table(self, table: str, set_data: dict, where: str) -> None: """ set_data_str = ", ".join( - list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), set_data.keys(), + list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), + set_data.keys(), set_data.values()))) - query = "UPDATE {table} SET {data} WHERE {where}".format(table=table, data=set_data_str, where=where) + query = "UPDATE {table} SET {data} WHERE {where}".format(table=table, data=set_data_str, + where=where) logger.debug("Executing: %s" % query) self._cursor.execute(query) self._connection.commit() - def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', order_by: str = 'NULL', + def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', + order_by: str = 'NULL', asc_or_desc: str = 'ASC', limit: int = 1000) -> List: """ Selects from a specified table based on the given columns, where, ordering and limit @@ -146,7 +150,8 @@ def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', """ query = "SELECT {columns} FROM {table} WHERE {where} ORDER BY {order_by} {asc_or_desc} LIMIT {limit}".format( - columns=columns, table=table, where=where, order_by=order_by, asc_or_desc=asc_or_desc, limit=limit) + columns=columns, table=table, where=where, order_by=order_by, asc_or_desc=asc_or_desc, + limit=limit) logger.debug("Executing: %s" % query) self._cursor.execute(query) results = self._cursor.fetchall() @@ -190,3 +195,45 @@ def __exit__(self) -> None: self._connection.commit() self._cursor.close() + + +class YoutubeMySqlDatastore(MySqlDatastore): + + def __init__(self, config: Dict) -> None: + """ + The basic constructor. Creates a new instance of Datastore using the specified credentials + + :param config: + """ + + super().__init__(config) + + def create_empty_tables(self): + channels_schema = \ + """id varchar(100) default '' not null, + username varchar(100) not null, + added_on varchar(100) not null, + last_commented varchar(100) not null, + priority int auto_increment, + channel_photo varchar(100) default '-1' null, + constraint id_pk PRIMARY KEY (id), + constraint id unique (id), + constraint priority unique (priority), + constraint username unique (username)""" + comments_schema = \ + """ + id varchar(100) not null, + link varchar(100) not null, + comment varchar(255) not null, + timestamp varchar(100) not null, + like_count int default -1 null, + reply_count int default -1 null, + comment_id varchar(100) default '-1' null, + video_id varchar(100) default '-1' null, + comment_link varchar(100) default '-1' null, + constraint link_pk PRIMARY KEY (link), + constraint link unique (link), + constraint channel_id foreign key (id) references channels (id) on update cascade on delete cascade""" + + self.create_table(table='channels', schema=channels_schema) + self.create_table(table='comments', schema=comments_schema) From b892c4d1a9e867d8f8b70c5369bfa1da70eddf51 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 20 Jun 2021 14:11:23 -0400 Subject: [PATCH 10/33] Created the basic function of the YoutubeMysqlDatastore class #4 --- youbot/__init__.py | 2 +- youbot/datastore/mysql_datastore.py | 122 +++++++++++++++++++++++++--- 2 files changed, 111 insertions(+), 13 deletions(-) diff --git a/youbot/__init__.py b/youbot/__init__.py index b4223b3..824649a 100644 --- a/youbot/__init__.py +++ b/youbot/__init__.py @@ -3,7 +3,7 @@ from youbot.fancy_logger import ColorizedLogger from youbot.configuration import Configuration, validate_json_schema from youbot.cloudstore import DropboxCloudstore -from youbot.datastore import MySqlDatastore +from youbot.datastore import YoutubeMySqlDatastore from youbot.emailer import GmailEmailer from youbot.youtube_utils import YoutubeManagerV3 diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py index 29354d4..c33a5f3 100644 --- a/youbot/datastore/mysql_datastore.py +++ b/youbot/datastore/mysql_datastore.py @@ -1,5 +1,6 @@ from typing import List, Tuple, Dict +from datetime import datetime from mysql import connector as mysql_connector import mysql.connector.cursor @@ -198,6 +199,8 @@ def __exit__(self) -> None: class YoutubeMySqlDatastore(MySqlDatastore): + CHANNEL_TABLE = 'channels' + COMMENTS_TABLE = 'comments' def __init__(self, config: Dict) -> None: """ @@ -207,33 +210,128 @@ def __init__(self, config: Dict) -> None: """ super().__init__(config) + self.create_tables_if_not_exist() - def create_empty_tables(self): + def create_tables_if_not_exist(self): channels_schema = \ - """id varchar(100) default '' not null, + """ + channel_id varchar(100) default '' not null, username varchar(100) not null, added_on varchar(100) not null, last_commented varchar(100) not null, priority int auto_increment, channel_photo varchar(100) default '-1' null, - constraint id_pk PRIMARY KEY (id), - constraint id unique (id), + constraint id_pk PRIMARY KEY (channel_id), + constraint channel_id unique (channel_id), constraint priority unique (priority), constraint username unique (username)""" comments_schema = \ """ - id varchar(100) not null, - link varchar(100) not null, + channel_id varchar(100) not null, + video_link varchar(100) not null, comment varchar(255) not null, - timestamp varchar(100) not null, + comment_time varchar(100) not null, like_count int default -1 null, reply_count int default -1 null, comment_id varchar(100) default '-1' null, video_id varchar(100) default '-1' null, comment_link varchar(100) default '-1' null, - constraint link_pk PRIMARY KEY (link), - constraint link unique (link), - constraint channel_id foreign key (id) references channels (id) on update cascade on delete cascade""" + constraint video_link_pk PRIMARY KEY (video_link), + constraint comment_link unique (comment_link), + constraint channel_id foreign key (channel_id) references channels (channel_id) on update cascade on delete cascade""" + + self.create_table(table=self.CHANNEL_TABLE, schema=channels_schema) + self.create_table(table=self.COMMENTS_TABLE, schema=comments_schema) + + def get_channels(self) -> List[Tuple]: + """ Retrieve all channels from the database. """ + + result = self.select_from_table(table=self.CHANNEL_TABLE) + + return result + + def add_channel(self, channel_data: Dict) -> None: + """ Insert the provided channel into the database""" + + try: + self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data) + except mysql.connector.errors.IntegrityError as e: + logger.error(f"MySQL error: {e}") + + def get_channel_by_id(self, ch_id: str) -> Tuple: + """Retrieve a channel from the database by its ID + + Args: + ch_id (str): The channel ID + """ + + where_statement = f"id='{ch_id}'" + result = self.select_from_table(table=self.CHANNEL_TABLE, where=where_statement) + if len(result) > 1: + logger.warning("Duplicate channel retrieved from SELECT statement:{result}") + elif len(result) == 0: + result.append(()) + + return result[0] + + def get_channel_by_username(self, ch_username: str) -> Tuple: + """Retrieve a channel from the database by its Username + + Args: + ch_username (str): The channel ID + """ + + where_statement = f"username='{ch_username}'" + result = self.select_from_table(table=self.CHANNEL_TABLE, where=where_statement) + if len(result) > 1: + logger.warning("Duplicate channel retrieved from SELECT statement:{result}") + elif len(result) == 0: + result.append(()) + + return result[0] + + def remove_channel_from_id(self, ch_id: str) -> None: + """Retrieve a channel from the database by its ID + + Args: + ch_id (str): The channel ID + """ + + where_statement = f"id='{ch_id}'" + self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) + + def remove_channel_by_username(self, ch_username: str) -> None: + """Delete a channel from the database by its Username + + Args: + ch_username (str): The channel ID + """ + + where_statement = f"username='{ch_username}'" + self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) + + def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: + """ TODO: check the case where a comment contains single quotes + Add comment data and update the `last_commented` channel column. + + Args: + ch_id: + video_link: + comment_text: + """ + + datetime_now = datetime.utcnow().isoformat() + comments_data = {'channel_id': ch_id, + 'video_link': video_link, + 'comment': comment_text, + 'comment_time': datetime_now} + update_data = {'last_commented': datetime_now} + where_statement = f"channel_id='{ch_id}'" + + try: + self.insert_into_table(self.COMMENTS_TABLE, data=comments_data) + # Update Channel's last_commented timestamp + self.update_table(table=self.CHANNEL_TABLE, set_data=update_data, where=where_statement) + except mysql.connector.errors.IntegrityError as e: + logger.error(f"MySQL Error: {e}") - self.create_table(table='channels', schema=channels_schema) - self.create_table(table='comments', schema=comments_schema) From e160720ef46e68ceba25b38847b68f356e653d1e Mon Sep 17 00:00:00 2001 From: drkostas Date: Wed, 23 Jun 2021 17:54:22 -0400 Subject: [PATCH 11/33] Finished the YoutubeMysqlDatastore class #4 --- TODO.md | 7 +- youbot/datastore/mysql_datastore.py | 230 +++++++++++++++++++++++----- 2 files changed, 193 insertions(+), 44 deletions(-) diff --git a/TODO.md b/TODO.md index 8a4e4f2..f2de586 100644 --- a/TODO.md +++ b/TODO.md @@ -3,11 +3,12 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Load starter - [X] Get channel name automatically - [X] Build YouTube Manager class -- [ ] Create child MySQL class +- [X] Create child MySQL class +- [ ] Integrate YoutubeMysql class into the YoutubeManager class - [ ] Roll the comments for each channel - [ ] Create the workflow for the commenter - [ ] Create the workflow for the accumulator - [ ] Add SQL script for creating the tables needed - [ ] Recreate the Livestreaming module -- [ ] Improve the youtube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) -- [ ] Use multiple account (different api keys) to check for new commetns +- [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) +- [ ] Use multiple account (different api keys) to check for new comments diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py index c33a5f3..f23dc89 100644 --- a/youbot/datastore/mysql_datastore.py +++ b/youbot/datastore/mysql_datastore.py @@ -50,6 +50,30 @@ def get_connection(username: str, password: str, hostname: str, db_name: str, po cursor = connection.cursor() return connection, cursor + def execute_query(self, query: str, commit: bool = False, + fetchall: bool = False, fetchone: bool = False) -> List[Tuple]: + """ + Execute a query in the DB. + Args: + query: + commit: + fetchall: + fetchone: + """ + + logger.debug("Executing: %s" % query) + try: + self._cursor.execute(query) + if commit: + self.commit() + if fetchall: + return self._cursor.fetchall() + if fetchone: + return self._cursor.fetchone() + except mysql.connector.errors.ProgrammingError as e: + logger.error(f'MySQL Error: {e}') + logger.error(f'Full Query: {query}') + def create_table(self, table: str, schema: str) -> None: """ Creates a table using the specified schema @@ -61,9 +85,7 @@ def create_table(self, table: str, schema: str) -> None: """ query = "CREATE TABLE IF NOT EXISTS {table} ({schema})".format(table=table, schema=schema) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - self._connection.commit() + self.execute_query(query, commit=True) def drop_table(self, table: str) -> None: """ @@ -75,9 +97,7 @@ def drop_table(self, table: str) -> None: """ query = "DROP TABLE IF EXISTS {table}".format(table=table) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - self._connection.commit() + self.execute_query(query, commit=True) def truncate_table(self, table: str) -> None: """ @@ -89,9 +109,7 @@ def truncate_table(self, table: str) -> None: """ query = "TRUNCATE TABLE {table}".format(table=table) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - self._connection.commit() + self.execute_query(query, commit=True) def insert_into_table(self, table: str, data: dict) -> None: """ @@ -108,9 +126,7 @@ def insert_into_table(self, table: str, data: dict) -> None: data.values()))) query = "INSERT INTO {table} SET {data}".format(table=table, data=data_str) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - self._connection.commit() + self.execute_query(query, commit=True) def update_table(self, table: str, set_data: dict, where: str) -> None: """ @@ -130,32 +146,98 @@ def update_table(self, table: str, set_data: dict, where: str) -> None: query = "UPDATE {table} SET {data} WHERE {where}".format(table=table, data=set_data_str, where=where) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - self._connection.commit() + self.execute_query(query, commit=True) def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', - order_by: str = 'NULL', - asc_or_desc: str = 'ASC', limit: int = 1000) -> List: + order_by: str = 'NULL', asc_or_desc: str = 'ASC', limit: int = 1000, + group_by: str = '', having: str = '') -> List[Tuple]: """ Selects from a specified table based on the given columns, where, ordering and limit - :param self: - :param table: - :param columns: - :param where: - :param order_by: - :param asc_or_desc: - :param limit: - :return results: + Args: + table: + columns: + where: + order_by: + asc_or_desc: + limit: + group_by: + having: """ - query = "SELECT {columns} FROM {table} WHERE {where} ORDER BY {order_by} {asc_or_desc} LIMIT {limit}".format( - columns=columns, table=table, where=where, order_by=order_by, asc_or_desc=asc_or_desc, - limit=limit) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - results = self._cursor.fetchall() + # Construct Group By + if group_by: + if having: + having = f'HAVING {having}' + group_by = f'GROUP BY {group_by} {having} ' + + # Build the Query + query = f"SELECT {columns} " \ + f"FROM {table} " \ + f"WHERE {where} " \ + f"{group_by}" \ + f"ORDER BY {order_by} {asc_or_desc} " \ + f"LIMIT {limit}" + + results = self.execute_query(query, fetchall=True) + + return results + + def select_join(self, left_table: str, right_table: str, + join_key_left: str, join_key_right: str, + left_columns: str = '', right_columns: str = '', custom_columns: str = '', + join_type: str = 'INNER', + where: str = 'TRUE', order_by: str = 'NULL', asc_or_desc: str = 'ASC', + limit: int = 1000, group_by: str = '', having: str = '') -> List[Tuple]: + """ + Join two tables and select. + + Args: + left_table: + right_table: + left_columns: + right_columns: + custom_columns: Custom columns for which no `l.` or `r.` will be added automatically + join_key_left: The column of join of the left table + join_key_right: The column of join of the right table + join_type: OneOf(INNER, LEFT, RIGHT) + where: Add a `l.` or `.r` before the specified columns + order_by: Add a `l.` or `.r` before the specified columns + asc_or_desc: + limit: + group_by: Add a `l.` or `.r` before the specified columns + having: Add a `l.` or `.r` before the specified columns + """ + + # Construct Group By + if group_by: + if having: + having = f'HAVING {having}' + group_by = f'GROUP BY {group_by} {having} ' + + # Construct Columns + if left_columns: + left_columns = 'l.' + ', l.'.join(map(str.strip, left_columns.split(','))) + if right_columns or custom_columns: + left_columns += ', ' + if right_columns: + right_columns = 'r.' + ', r.'.join(map(str.strip, right_columns.split(','))) + if custom_columns: + right_columns += ', ' + columns = f'{left_columns} {right_columns} {custom_columns}' + + # Build the Query + query = f"SELECT {columns} " \ + f"FROM {left_table} l " \ + f"{join_type} JOIN {right_table} r " \ + f"ON l.{join_key_left}=r.{join_key_right} " \ + f"WHERE {where} " \ + f"{group_by}" \ + f"ORDER BY {order_by} {asc_or_desc} " \ + f"LIMIT {limit}" + + print(query) + results = self.execute_query(query, fetchall=True) return results @@ -170,9 +252,7 @@ def delete_from_table(self, table: str, where: str) -> None: """ query = "DELETE FROM {table} WHERE {where}".format(table=table, where=where) - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - self._connection.commit() + self.execute_query(query, commit=True) def show_tables(self) -> List: """ @@ -181,22 +261,25 @@ def show_tables(self) -> List: """ query = 'SHOW TABLES' - logger.debug("Executing: %s" % query) - self._cursor.execute(query) - results = self._cursor.fetchall() + results = self.execute_query(query, fetchall=True) return [result[0] for result in results] - def __exit__(self) -> None: + def commit(self) -> None: + self._connection.commit() + + def close_connection(self) -> None: """ Flushes and closes the connection :return: """ - self._connection.commit() + self.commit() self._cursor.close() + __exit__ = close_connection + class YoutubeMySqlDatastore(MySqlDatastore): CHANNEL_TABLE = 'channels' @@ -237,7 +320,7 @@ def create_tables_if_not_exist(self): video_id varchar(100) default '-1' null, comment_link varchar(100) default '-1' null, constraint video_link_pk PRIMARY KEY (video_link), - constraint comment_link unique (comment_link), + constraint video_link unique (video_link), constraint channel_id foreign key (channel_id) references channels (channel_id) on update cascade on delete cascade""" self.create_table(table=self.CHANNEL_TABLE, schema=channels_schema) @@ -290,7 +373,7 @@ def get_channel_by_username(self, ch_username: str) -> Tuple: return result[0] - def remove_channel_from_id(self, ch_id: str) -> None: + def remove_channel_by_id(self, ch_id: str) -> None: """Retrieve a channel from the database by its ID Args: @@ -310,6 +393,19 @@ def remove_channel_by_username(self, ch_username: str) -> None: where_statement = f"username='{ch_username}'" self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) + def update_channel_photo(self, channel_id: str, photo_url: str) -> None: + """ + Update the profile picture link of a channel. + Args: + channel_id: + photo_url: + """ + + set_data = {'channel_photo': photo_url} + self.update_table(table=self.CHANNEL_TABLE, + set_data=set_data, + where=f"channel_id='{channel_id}'") + def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: """ TODO: check the case where a comment contains single quotes Add comment data and update the `last_commented` channel column. @@ -335,3 +431,55 @@ def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: except mysql.connector.errors.IntegrityError as e: logger.error(f"MySQL Error: {e}") + def get_comments(self, n_recent: int, min_likes: int = -1, + min_replies: int = -1) -> List[Tuple]: + """ + Get the latest n_recent comments from the comments table. + Args: + n_recent: + min_likes: + min_replies: + """ + + comment_cols = 'video_link, comment, comment_time, like_count, reply_count, comment_link' + channel_cols = 'username, channel_photo' + where = f'l.like_count>={min_likes} AND l.reply_count>={min_replies} ' + for comment in self.select_join(left_table=self.COMMENTS_TABLE, + right_table=self.CHANNEL_TABLE, + left_columns=comment_cols, + right_columns=channel_cols, + custom_columns='COUNT(comment) as cnt', + join_key_left='channel_id', + join_key_right='channel_id', + where=where, + order_by='l.comment_time', + asc_or_desc='desc', + limit=n_recent): + yield comment + + def update_comment(self, video_link: str, comment_id: str, + like_cnt: int, reply_cnt: int) -> None: + """ + Populate a comment entry with additional information. + Args: + video_link: + comment_id: + like_cnt: + reply_cnt: + """ + + # Get video id + video_id = video_link.split('v=')[1].split('&')[0] + # Create Comment Link + comment_link = f'https://youtube.com/watch?v={video_id}&lc={comment_id}' + # Construct the update key-values + set_data = {'comment_link': comment_link, + 'video_id': video_id, + 'comment_id': comment_id, + 'like_count': like_cnt, + 'reply_count': reply_cnt} + # Execute the update command + self.update_table(table=self.COMMENTS_TABLE, + set_data=set_data, + where=f"video_link='{video_link}'") + From 0221369bb3b7fae0e2fca9fbd867276eba2429d6 Mon Sep 17 00:00:00 2001 From: drkostas Date: Thu, 1 Jul 2021 21:39:43 -0400 Subject: [PATCH 12/33] Started the high-level functions #4 - Renamed YoutubeManagerV3 to YoutubeApiV3 - child class YoutubeManager that used the mysql class and contains the high-level functions - Created the add_channel and remove_channel functions and integrated the current functionality into the main.py --- TODO.md | 6 +- youbot/__init__.py | 2 +- youbot/datastore/mysql_datastore.py | 12 +- youbot/main.py | 69 +++-- youbot/youtube_utils/__init__.py | 2 +- youbot/youtube_utils/youtube_api.py | 372 +++++++++++++++++++++++ youbot/youtube_utils/youtube_manager.py | 387 ++---------------------- 7 files changed, 467 insertions(+), 383 deletions(-) create mode 100644 youbot/youtube_utils/youtube_api.py diff --git a/TODO.md b/TODO.md index f2de586..da8a276 100644 --- a/TODO.md +++ b/TODO.md @@ -3,10 +3,10 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Load starter - [X] Get channel name automatically - [X] Build YouTube Manager class -- [X] Create child MySQL class -- [ ] Integrate YoutubeMysql class into the YoutubeManager class -- [ ] Roll the comments for each channel +- [X] Create child MySQL class +- [X] Integrate YoutubeMysql class into the YoutubeManager class - [ ] Create the workflow for the commenter +- [ ] Roll the comments for each channel - [ ] Create the workflow for the accumulator - [ ] Add SQL script for creating the tables needed - [ ] Recreate the Livestreaming module diff --git a/youbot/__init__.py b/youbot/__init__.py index 824649a..e536fea 100644 --- a/youbot/__init__.py +++ b/youbot/__init__.py @@ -5,7 +5,7 @@ from youbot.cloudstore import DropboxCloudstore from youbot.datastore import YoutubeMySqlDatastore from youbot.emailer import GmailEmailer -from youbot.youtube_utils import YoutubeManagerV3 +from youbot.youtube_utils import YoutubeManager, YoutubeApiV3 __author__ = "drkostas" __email__ = "georgiou.kostas94@gmail.com" diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py index f23dc89..4cc6cb2 100644 --- a/youbot/datastore/mysql_datastore.py +++ b/youbot/datastore/mysql_datastore.py @@ -111,21 +111,25 @@ def truncate_table(self, table: str) -> None: query = "TRUNCATE TABLE {table}".format(table=table) self.execute_query(query, commit=True) - def insert_into_table(self, table: str, data: dict) -> None: + def insert_into_table(self, table: str, data: dict, if_not_exists: bool = False) -> None: """ Inserts into the specified table a row based on a column_name: value dictionary :param self: :param table: :param data: + :param if_not_exists: :return: """ data_str = ", ".join( list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), data.keys(), data.values()))) - - query = "INSERT INTO {table} SET {data}".format(table=table, data=data_str) + if if_not_exists: + ignore = 'IGNORE' + else: + ignore = '' + query = f"INSERT {ignore} INTO {table} SET {data_str}" self.execute_query(query, commit=True) def update_table(self, table: str, set_data: dict, where: str) -> None: @@ -337,7 +341,7 @@ def add_channel(self, channel_data: Dict) -> None: """ Insert the provided channel into the database""" try: - self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data) + self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data, if_not_exists=True) except mysql.connector.errors.IntegrityError as e: logger.error(f"MySQL error: {e}") diff --git a/youbot/main.py b/youbot/main.py index a639369..902567c 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -1,8 +1,7 @@ import traceback import argparse -from youbot import Configuration, ColorizedLogger, \ - DropboxCloudstore, MySqlDatastore, GmailEmailer, YoutubeManagerV3 +from youbot import Configuration, ColorizedLogger, YoutubeManager logger = ColorizedLogger(logger_name='Main', color='yellow') @@ -13,6 +12,7 @@ def get_args() -> argparse.Namespace: Returns: argparse.Namespace: """ + parser = argparse.ArgumentParser( description='A template for python projects.', add_help=False) @@ -27,14 +27,54 @@ def get_args() -> argparse.Namespace: required_args.add_argument('-l', '--log', required=True, help="Name of the output log file") # Optional args optional_args = parser.add_argument_group('Optional Arguments') - optional_args.add_argument('-m', '--run-mode', choices=['run_mode_1', 'run_mode_2', 'run_mode_3'], - default='run_mode_1', + commands = ['commenter', 'accumulator', + 'add_channel', 'remove_channel', 'list_channels', 'list_comments', + 'refresh_photos'] + optional_args.add_argument('-m', '--run-mode', choices=commands, + default=commands[0], help='Description of the run modes') + optional_args.add_argument('-i', '--id', help="The ID of the YouTube Channel") + optional_args.add_argument('-u', '--username', + help="The Username of the YouTube Channel") optional_args.add_argument('-d', '--debug', action='store_true', help='Enables the debug log messages') optional_args.add_argument("-h", "--help", action="help", help="Show this help message and exit") - return parser.parse_args() + args = parser.parse_args() + # Custom Condition Checking + if (args.id is None and args.username is None) and \ + args.run_mode in ['add_channel', 'remove_channel']: + parser.error('You need to pass either --id or --username when selecting ' + 'the `add_channel` and `remove_channel` actions') + return args + + +def commenter(youtube: YoutubeManager, args: argparse.Namespace) -> None: + raise NotImplementedError() + + +def accumulator(youtube: YoutubeManager, args: argparse.Namespace) -> None: + raise NotImplementedError() + + +def add_channel(youtube: YoutubeManager, args: argparse.Namespace) -> None: + youtube.add_channel(channel_id=args.id, username=args.username) + + +def remove_channel(youtube: YoutubeManager, args: argparse.Namespace) -> None: + youtube.remove_channel(channel_id=args.id, username=args.username) + + +def list_channels(youtube: YoutubeManager, args: argparse.Namespace) -> None: + raise NotImplementedError() + + +def list_comments(youtube: YoutubeManager, args: argparse.Namespace) -> None: + raise NotImplementedError() + + +def refresh_photos(youtube: YoutubeManager, args: argparse.Namespace) -> None: + raise NotImplementedError() def main(): @@ -47,21 +87,16 @@ def main(): # Initializing args = get_args() ColorizedLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=True) - # Load the configuration + # Load the configurations conf_obj = Configuration(config_src=args.config_file) you_conf = conf_obj.get_config('youtube')[0] + db_conf = conf_obj.get_config('datastore')[0] # Setup Youtube API - yout_manager = YoutubeManagerV3(config=you_conf['config'], - channel_name=you_conf['channel'], - tag=conf_obj.tag) - - # Test the video retrieval for 3 channels - pewd_info = yout_manager.get_channel_info_by_username('Pewdiepie') - v_info = yout_manager.get_channel_info_by_username('Veritasium') - ku_info = yout_manager.get_channel_info_by_username('Kurzgesagt') - channel_ids = [pewd_info['id'], v_info['id'], ku_info['id']] - for video in yout_manager.get_uploads(channels=channel_ids, last_n_hours=12000): - logger.info(video) + youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, + tag=conf_obj.tag) + # Run in the specified run mode + func = globals()[args.run_mode] + func(youtube, args) if __name__ == '__main__': diff --git a/youbot/youtube_utils/__init__.py b/youbot/youtube_utils/__init__.py index e40960d..a0b1704 100644 --- a/youbot/youtube_utils/__init__.py +++ b/youbot/youtube_utils/__init__.py @@ -1,6 +1,6 @@ """Youtube Utils sub-package of YoutubeCommentBot.""" -from .youtube_manager import YoutubeManagerV3 +from .youtube_manager import YoutubeApiV3, YoutubeManager __author__ = "drkostas" __email__ = "georgiou.kostas94@gmail.com" diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py new file mode 100644 index 0000000..a3e5002 --- /dev/null +++ b/youbot/youtube_utils/youtube_api.py @@ -0,0 +1,372 @@ +from typing import List, Tuple, Dict, Union, Any +from abc import ABC, abstractmethod +import os +import re +import math +from datetime import datetime, timedelta, timezone +import dateutil.parser +from oauth2client.file import Storage +from oauth2client.tools import argparser, run_flow +from oauth2client.client import OAuth2WebServerFlow +import googleapiclient +from googleapiclient.discovery import build +import httplib2 + +from youbot import ColorizedLogger + +logger = ColorizedLogger('YoutubeApi') + + +class AbstractYoutubeApi(ABC): + __slots__ = ('channel_name', 'channel_id', '_api', 'tag') + + @abstractmethod + def __init__(self, config: Dict, tag: str) -> None: + """ + The basic constructor. Creates a new instance of YoutubeManager using the specified credentials + + :param config: + """ + + self.tag = tag + self._api = self._build_api(**config, tag=self.tag) + self.channel_name, self.channel_id = self._get_my_username_and_id() + + @staticmethod + @abstractmethod + def _build_api(*args, **kwargs): + pass + + @abstractmethod + def _get_my_username_and_id(self) -> str: + pass + + +class YoutubeApiV3(AbstractYoutubeApi): + + def __init__(self, config: Dict, tag: str): + super().__init__(config, tag) + + @staticmethod + def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, + tag: str) -> googleapiclient.discovery.Resource: + """ + Build a youtube api connection. + + Args: + client_id: + client_secret: + api_version: + read_only_scope: + tag: + """ + + flow = OAuth2WebServerFlow(client_id=client_id, + client_secret=client_secret, + scope=read_only_scope) + key_path = os.path.join('..', 'keys', f'{tag}.json') + storage = Storage(key_path) + credentials = storage.get() + + if credentials is None or credentials.invalid: + flags = argparser.parse_args(args=['--noauth_local_webserver']) + credentials = run_flow(flow, storage, flags) + + api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) + return api + + def _get_my_username_and_id(self) -> Tuple[str, str]: + channels_response = self._api.channels().list( + part="snippet", + fields='items(id,snippet(title))', + mine='true' + ).execute() + if channels_response: + channel_info = self._yt_to_channel_dict(channels_response) + my_username = channel_info['username'] + my_id = channel_info['channel_id'] + else: + error_msg = "Got empty response when trying to get the self username." + logger.error(error_msg) + raise Exception(error_msg) + return my_username, my_id + + def comment(self, video_id: str, comment_text: str) -> None: + + try: + properties = {'snippet.channelId': self.channel_id, + 'snippet.videoId': video_id, + 'snippet.topLevelComment.snippet.textOriginal': comment_text} + self._comment_threads_insert(properties=properties, + part='snippet') + except Exception as exc: + logger.error(f"An error occurred:\n{exc}") + + def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: + """ Queries YouTube for a channel using the specified username. + + Args: + username (str): The username to search for + """ + + channels_response = self._api.channels().list( + forUsername=username, + part="snippet", + fields='items(id,snippet(title))' + ).execute() + if channels_response: + channel = self._yt_to_channel_dict(channels_response) + if channel is not None: + channel['username'] = username + else: + logger.warning(f"Got empty response for channel username: {username}") + channel = {} + return channel + + def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: + """ Queries YouTube for a channel using the specified channel id. + + Args: + channel_id (str): The channel ID to search for + """ + + channels_response = self._api.channels().list( + id=channel_id, + part="snippet", + fields='items(id,snippet(title))' + ).execute() + + return self._yt_to_channel_dict(channels_response) + + def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: + """ Retrieves new uploads for the specified channels. + + Args: + channels(list): A list with channel IDs + last_n_hours: + """ + + # Separate the channels list in 50-sized channel lists + channels_lists = self.split_list(channels, 50) + channels_to_check = [] + # Get the Playlist IDs of each channel + for channels in channels_lists: + channels_response = self._api.channels().list( + id=",".join(channels), + part="contentDetails,snippet", + fields="items(id,contentDetails(relatedPlaylists(uploads)),snippet(title))" + ).execute() + channels_to_check.extend(channels_response["items"]) + # For each playlist ID, get 50 videos + for channel in channels_to_check: + uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] + for upload in self._get_uploads_playlist(uploads_list_id, last_n_hours): + upload['channel_title'] = channel['snippet']['title'] + upload['channel_id'] = channel['id'] + yield upload + + def get_video_comments(self, url: str, search_terms: str = None) -> List: + """ Populates a list with comments (and their replies). + + Args: + url: + search_terms: + """ + + if not search_terms: + search_terms = self.channel_name + video_id = re.search(r"^.*(youtu\.be\/|vi?\/|u\/\w\/|embed\/|\?vi?=|\&vi?=)([^#\&\?]*).*", + url).group(2) + page_token = "" # "&pageToken={}".format(page_token) + comment_threads_response = self._api.commentThreads().list( + part="snippet", + maxResults=100, + videoId="{}{}".format(video_id, page_token), + searchTerms=search_terms + ).execute() + + comments = [] + for comment_thread in comment_threads_response['items']: + channel_name = comment_thread['snippet']['topLevelComment']['snippet']['authorDisplayName'] + if channel_name == self.channel_name: + current_comment = {"url": url, "video_id": video_id, + "comment_id": comment_thread['id'], + "like_count": + comment_thread['snippet']['topLevelComment']['snippet'][ + 'likeCount'], + "reply_count": comment_thread['snippet']['totalReplyCount']} + comments.append(current_comment) + + return comments + + def get_profile_pictures(self, channels: List = None) -> List[Tuple[str, str]]: + """ Gets the profile picture urls for a list of channel ids (or for the self channel). + + Args: + channels: + + Returns: + profile_pictures: [(channel_id, thumbnail_url), ..] + """ + + if channels is None: + profile_pictures_request = self._api.channels().list( + mine="true", + part="snippet", + fields='items(id,snippet(thumbnails(default)))' + ) + else: + profile_pictures_request = self._api.channels().list( + id=",".join(channels), + part="snippet", + fields='items(id,snippet(thumbnails(default)))' + ) + + profile_pictures_response = profile_pictures_request.execute() + + profile_pictures_result = [] + for profile_picture in profile_pictures_response["items"]: + profile_pictures_result.append( + (profile_picture["id"], profile_picture["snippet"]["thumbnails"]["default"]["url"])) + + return profile_pictures_result + + @staticmethod + def _yt_to_channel_dict(response: Dict) -> Union[Dict, None]: + """ + Transforms a YouTube API response into a channel Dict. + + Args: + response: + """ + + for channel in response['items']: + result = dict() + result['channel_id'] = channel['id'] + result['username'] = channel['snippet']['title'] + result['added_on'] = datetime.utcnow().isoformat() + result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() + return result + return None + + @staticmethod + def split_list(input_list: List, chunk_size: int) -> List: + """ + Split a list into `chunk_size` sub-lists. + + Args: + input_list: + chunk_size: + """ + + chunks = math.ceil(len(input_list) / chunk_size) + if chunks == 1: + output_list = [input_list] + else: + output_list = [] + end = 0 + for i in range(chunks - 1): + start = i * chunk_size + end = (i + 1) * chunk_size + output_list.append(input_list[start:end]) + output_list.append(input_list[end:]) + + return output_list + + def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> Dict: + """ Retrieves uploads using the specified playlist ID which were have been added + since the last check. + + Args: + uploads_list_id (str): The ID of the uploads playlist + """ + + # Construct the request + playlist_items_request = self._api.playlistItems().list( + playlistId=uploads_list_id, + part="snippet", + fields='items(id,snippet(title,publishedAt,resourceId(videoId)))', + maxResults=50 + ) + + while playlist_items_request: + playlist_items_response = playlist_items_request.execute() + for playlist_item in playlist_items_response["items"]: + published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) + video = dict() + # Return the video only if it was published in the last `last_n_hours` hours + if published_at >= (datetime.utcnow() - timedelta(hours=last_n_hours)).replace( + tzinfo=timezone.utc): + video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] + video['published_at'] = playlist_item["snippet"]["publishedAt"] + video['title'] = playlist_item["snippet"]["title"] + yield video + else: + return + + playlist_items_request = self._api.playlistItems().list_next( + playlist_items_request, playlist_items_response + ) + + def _comment_threads_insert(self, properties: Dict, **kwargs: Any) -> Dict: + """ Comment using the Youtube API. + Args: + properties: + **kwargs: + """ + + resource = self._build_resource(properties) + kwargs = self._remove_empty_kwargs(**kwargs) + response = self._api.commentThreads().insert(body=resource, **kwargs).execute() + return response + + @staticmethod + def _build_resource(properties: Dict) -> Dict: + """ Build a resource based on a list of properties given as key-value pairs. + Leave properties with empty values out of the inserted resource. """ + + resource = {} + for p in properties: + # Given a key like "snippet.title", split into "snippet" and "title", where + # "snippet" will be an object and "title" will be a property in that object. + prop_array = p.split('.') + ref = resource + for pa in range(0, len(prop_array)): + is_array = False + key = prop_array[pa] + # For properties that have array values, convert a name like + # "snippet.tags[]" to snippet.tags, and set a flag to handle + # the value as an array. + if key[-2:] == '[]': + key = key[0:len(key) - 2:] + is_array = True + if pa == (len(prop_array) - 1): + # Leave properties without values out of inserted resource. + if properties[p]: + if is_array: + ref[key] = properties[p].split(',') + else: + ref[key] = properties[p] + elif key not in ref: + # For example, the property is "snippet.title", but the resource does + # not yet have a "snippet" object. Create the snippet object here. + # Setting "ref = ref[key]" means that in the next time through the + # "for pa in range ..." loop, we will be setting a property in the + # resource's "snippet" object. + ref[key] = {} + ref = ref[key] + else: + # For example, the property is "snippet.description", and the resource + # already has a "snippet" object. + ref = ref[key] + return resource + + @staticmethod + def _remove_empty_kwargs(**kwargs: Any) -> Dict: + """ Remove keyword arguments that are not set. """ + good_kwargs = {} + if kwargs is not None: + for key, value in kwargs.items(): + if value: + good_kwargs[key] = value + return good_kwargs diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 85cd338..0234527 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,372 +1,45 @@ from typing import List, Tuple, Dict, Union, Any -from abc import ABC, abstractmethod -import os -import re -import math from datetime import datetime, timedelta, timezone -import dateutil.parser -from oauth2client.file import Storage -from oauth2client.tools import argparser, run_flow -from oauth2client.client import OAuth2WebServerFlow -import googleapiclient -from googleapiclient.discovery import build -import httplib2 -from youbot import ColorizedLogger +from youbot import ColorizedLogger, YoutubeMySqlDatastore +from .youtube_api import YoutubeApiV3 logger = ColorizedLogger('YoutubeManager') -class AbstractYoutubeManager(ABC): - __slots__ = ('channel_name', 'channel_id', '_api', 'tag') +class YoutubeManager(YoutubeApiV3): + __slots__ = ('db',) - @abstractmethod - def __init__(self, config: Dict, tag: str) -> None: - """ - The basic constructor. Creates a new instance of YoutubeManager using the specified credentials - - :param config: - """ - - self.tag = tag - self._api = self._build_api(**config, tag=self.tag) - self.channel_name, self.channel_id = self._get_my_username_and_id() - - @staticmethod - @abstractmethod - def _build_api(*args, **kwargs): - pass - - @abstractmethod - def _get_my_username_and_id(self) -> str: - pass - - -class YoutubeManagerV3(AbstractYoutubeManager): - def __init__(self, config: Dict, tag: str): + def __init__(self, config: Dict, db_conf: Dict, tag: str): + self.db = YoutubeMySqlDatastore(config=db_conf['config']) super().__init__(config, tag) - @staticmethod - def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, - tag: str) -> googleapiclient.discovery.Resource: - """ - Build a youtube api connection. - - Args: - client_id: - client_secret: - api_version: - read_only_scope: - tag: - """ - - flow = OAuth2WebServerFlow(client_id=client_id, - client_secret=client_secret, - scope=read_only_scope) - key_path = os.path.join('..', 'keys', f'{tag}.json') - storage = Storage(key_path) - credentials = storage.get() - - if credentials is None or credentials.invalid: - flags = argparser.parse_args(args=['--noauth_local_webserver']) - credentials = run_flow(flow, storage, flags) - - api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) - return api - - def _get_my_username_and_id(self) -> Tuple[str, str]: - channels_response = self._api.channels().list( - part="snippet", - fields='items(id,snippet(title))', - mine='true' - ).execute() - if channels_response: - channel_info = self._channel_from_response(channels_response) - my_username = channel_info['username'] - my_id = channel_info['id'] + def add_channel(self, channel_id: str = None, username: str = None) -> None: + if channel_id: + channel_info = self.get_channel_info_by_id(channel_id) + elif username: + channel_info = self.get_channel_info_by_username(username) else: - error_msg = "Got empty response when trying to get the self username." - logger.error(error_msg) - raise Exception(error_msg) - return my_username, my_id - - def comment(self, video_id: str, comment_text: str) -> None: - - try: - properties = {'snippet.channelId': self.channel_id, - 'snippet.videoId': video_id, - 'snippet.topLevelComment.snippet.textOriginal': comment_text} - self._comment_threads_insert(properties=properties, - part='snippet') - except Exception as exc: - logger.error(f"An error occurred:\n{exc}") - - def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: - """ Queries YouTube for a channel using the specified username. - - Args: - username (str): The username to search for - """ - - channels_response = self._api.channels().list( - forUsername=username, - part="snippet", - fields='items(id,snippet(title))' - ).execute() - if channels_response: - channel = self._channel_from_response(channels_response) - if channel is not None: - channel['username'] = username + raise YoutubeManagerError("You should either pass channel id or username " + "to add channel!") + if channel_info: + self.db.add_channel(channel_data=channel_info) + logger.info(f"Channel `{channel_info['username']}` successfully added!") else: - logger.warning(f"Got empty response for channel username: {username}") - channel = {} - return channel - - def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: - """ Queries YouTube for a channel using the specified channel id. - - Args: - channel_id (str): The channel ID to search for - """ - - channels_response = self._api.channels().list( - id=channel_id, - part="snippet", - fields='items(id,snippet(title))' - ).execute() - - return self._channel_from_response(channels_response) - - def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: - """ Retrieves new uploads for the specified channels. - - Args: - channels(list): A list with channel IDs - last_n_hours: - """ - - # Separate the channels list in 50-sized channel lists - channels_lists = self.split_list(channels, 50) - channels_to_check = [] - # Get the Playlist IDs of each channel - for channels in channels_lists: - channels_response = self._api.channels().list( - id=",".join(channels), - part="contentDetails,snippet", - fields="items(id,contentDetails(relatedPlaylists(uploads)),snippet(title))" - ).execute() - channels_to_check.extend(channels_response["items"]) - # For each playlist ID, get 50 videos - for channel in channels_to_check: - uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] - for upload in self._get_uploads_playlist(uploads_list_id, last_n_hours): - upload['channel_title'] = channel['snippet']['title'] - upload['channel_id'] = channel['id'] - yield upload - - def get_video_comments(self, url: str, search_terms: str = None) -> List: - """ Populates a list with comments (and their replies). - - Args: - url: - search_terms: - """ - - if not search_terms: - search_terms = self.channel_name - video_id = re.search(r"^.*(youtu\.be\/|vi?\/|u\/\w\/|embed\/|\?vi?=|\&vi?=)([^#\&\?]*).*", - url).group(2) - page_token = "" # "&pageToken={}".format(page_token) - comment_threads_response = self._api.commentThreads().list( - part="snippet", - maxResults=100, - videoId="{}{}".format(video_id, page_token), - searchTerms=search_terms - ).execute() - - comments = [] - for comment_thread in comment_threads_response['items']: - channel_name = comment_thread['snippet']['topLevelComment']['snippet']['authorDisplayName'] - if channel_name == self.channel_name: - current_comment = {"url": url, "video_id": video_id, - "comment_id": comment_thread['id'], - "like_count": - comment_thread['snippet']['topLevelComment']['snippet'][ - 'likeCount'], - "reply_count": comment_thread['snippet']['totalReplyCount']} - comments.append(current_comment) - - return comments - - def get_profile_pictures(self, channels: List = None) -> List[Tuple[str, str]]: - """ Gets the profile picture urls for a list of channel ids (or for the self channel). - - Args: - channels: - - Returns: - profile_pictures: [(channel_id, thumbnail_url), ..] - """ - - if channels is None: - profile_pictures_request = self._api.channels().list( - mine="true", - part="snippet", - fields='items(id,snippet(thumbnails(default)))' - ) + raise YoutubeManagerError("Channel not found!") + + def remove_channel(self, channel_id: str = None, username: str = None) -> None: + if channel_id: + self.db.remove_channel_by_id(channel_id) + logger.info(f"Channel `{channel_id}` successfully removed!") + elif username: + self.db.remove_channel_by_username(username) + logger.info(f"Channel `{username}` successfully removed!") else: - profile_pictures_request = self._api.channels().list( - id=",".join(channels), - part="snippet", - fields='items(id,snippet(thumbnails(default)))' - ) - - profile_pictures_response = profile_pictures_request.execute() - - profile_pictures_result = [] - for profile_picture in profile_pictures_response["items"]: - profile_pictures_result.append( - (profile_picture["id"], profile_picture["snippet"]["thumbnails"]["default"]["url"])) - - return profile_pictures_result - - @staticmethod - def _channel_from_response(response: Dict) -> Union[Dict, None]: - """ - Transforms a YouTube API response into a channel Dict. - - Args: - response: - """ - - for channel in response['items']: - result = dict() - result['id'] = channel['id'] - result['username'] = channel['snippet']['title'] - result['title'] = None - result['added_on'] = datetime.utcnow().isoformat() - result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() - return result - return None - - @staticmethod - def split_list(input_list: List, chunk_size: int) -> List: - """ - Split a list into `chunk_size` sub-lists. - - Args: - input_list: - chunk_size: - """ - - chunks = math.ceil(len(input_list) / chunk_size) - if chunks == 1: - output_list = [input_list] - else: - output_list = [] - end = 0 - for i in range(chunks - 1): - start = i * chunk_size - end = (i + 1) * chunk_size - output_list.append(input_list[start:end]) - output_list.append(input_list[end:]) - - return output_list - - def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> Dict: - """ Retrieves uploads using the specified playlist ID which were have been added - since the last check. - - Args: - uploads_list_id (str): The ID of the uploads playlist - """ - - # Construct the request - playlist_items_request = self._api.playlistItems().list( - playlistId=uploads_list_id, - part="snippet", - fields='items(id,snippet(title,publishedAt,resourceId(videoId)))', - maxResults=50 - ) - - while playlist_items_request: - playlist_items_response = playlist_items_request.execute() - for playlist_item in playlist_items_response["items"]: - published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) - video = dict() - # Return the video only if it was published in the last `last_n_hours` hours - if published_at >= (datetime.utcnow() - timedelta(hours=last_n_hours)).replace( - tzinfo=timezone.utc): - video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] - video['published_at'] = playlist_item["snippet"]["publishedAt"] - video['title'] = playlist_item["snippet"]["title"] - yield video - else: - return - - playlist_items_request = self._api.playlistItems().list_next( - playlist_items_request, playlist_items_response - ) - - def _comment_threads_insert(self, properties: Dict, **kwargs: Any) -> Dict: - """ Comment using the Youtube API. - Args: - properties: - **kwargs: - """ - - resource = self._build_resource(properties) - kwargs = self._remove_empty_kwargs(**kwargs) - response = self._api.commentThreads().insert(body=resource, **kwargs).execute() - return response - - @staticmethod - def _build_resource(properties: Dict) -> Dict: - """ Build a resource based on a list of properties given as key-value pairs. - Leave properties with empty values out of the inserted resource. """ + raise YoutubeManagerError("You should either pass channel id or username " + "to remove channel!") - resource = {} - for p in properties: - # Given a key like "snippet.title", split into "snippet" and "title", where - # "snippet" will be an object and "title" will be a property in that object. - prop_array = p.split('.') - ref = resource - for pa in range(0, len(prop_array)): - is_array = False - key = prop_array[pa] - # For properties that have array values, convert a name like - # "snippet.tags[]" to snippet.tags, and set a flag to handle - # the value as an array. - if key[-2:] == '[]': - key = key[0:len(key) - 2:] - is_array = True - if pa == (len(prop_array) - 1): - # Leave properties without values out of inserted resource. - if properties[p]: - if is_array: - ref[key] = properties[p].split(',') - else: - ref[key] = properties[p] - elif key not in ref: - # For example, the property is "snippet.title", but the resource does - # not yet have a "snippet" object. Create the snippet object here. - # Setting "ref = ref[key]" means that in the next time through the - # "for pa in range ..." loop, we will be setting a property in the - # resource's "snippet" object. - ref[key] = {} - ref = ref[key] - else: - # For example, the property is "snippet.description", and the resource - # already has a "snippet" object. - ref = ref[key] - return resource - @staticmethod - def _remove_empty_kwargs(**kwargs: Any) -> Dict: - """ Remove keyword arguments that are not set. """ - good_kwargs = {} - if kwargs is not None: - for key, value in kwargs.items(): - if value: - good_kwargs[key] = value - return good_kwargs +class YoutubeManagerError(Exception): + def __init__(self, message): + super().__init__(message) From a8f48a49cdeaf85ae1cf4a47d65fe3dcc382cb67 Mon Sep 17 00:00:00 2001 From: drkostas Date: Thu, 1 Jul 2021 22:45:06 -0400 Subject: [PATCH 13/33] Recreated the list_channels and list_comments functions with pretty print #4 --- requirements.txt | 1 + youbot/datastore/mysql_datastore.py | 51 ++++++++++++++++++---- youbot/main.py | 11 ++++- youbot/youtube_utils/youtube_manager.py | 56 ++++++++++++++++++++++++- 4 files changed, 108 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 826dc79..866ab8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +arrow~=1.1.1 dropbox~=11.10.0 gmail~=0.6.3 google-api-python-client~=2.7.0 diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py index 4cc6cb2..be6303f 100644 --- a/youbot/datastore/mysql_datastore.py +++ b/youbot/datastore/mysql_datastore.py @@ -330,12 +330,12 @@ def create_tables_if_not_exist(self): self.create_table(table=self.CHANNEL_TABLE, schema=channels_schema) self.create_table(table=self.COMMENTS_TABLE, schema=comments_schema) - def get_channels(self) -> List[Tuple]: + def get_channels(self) -> List[Dict]: """ Retrieve all channels from the database. """ - result = self.select_from_table(table=self.CHANNEL_TABLE) - - return result + result = self.select_from_table(table=self.CHANNEL_TABLE, order_by='priority') + for row in result: + yield self._table_row_to_channel_dict(row) def add_channel(self, channel_data: Dict) -> None: """ Insert the provided channel into the database""" @@ -435,8 +435,8 @@ def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: except mysql.connector.errors.IntegrityError as e: logger.error(f"MySQL Error: {e}") - def get_comments(self, n_recent: int, min_likes: int = -1, - min_replies: int = -1) -> List[Tuple]: + def get_comments(self, n_recent: int = 50, min_likes: int = -1, + min_replies: int = -1) -> List[Dict]: """ Get the latest n_recent comments from the comments table. Args: @@ -452,14 +452,13 @@ def get_comments(self, n_recent: int, min_likes: int = -1, right_table=self.CHANNEL_TABLE, left_columns=comment_cols, right_columns=channel_cols, - custom_columns='COUNT(comment) as cnt', join_key_left='channel_id', join_key_right='channel_id', where=where, order_by='l.comment_time', asc_or_desc='desc', limit=n_recent): - yield comment + yield self._table_row_to_comment_dict(comment) def update_comment(self, video_link: str, comment_id: str, like_cnt: int, reply_cnt: int) -> None: @@ -487,3 +486,39 @@ def update_comment(self, video_link: str, comment_id: str, set_data=set_data, where=f"video_link='{video_link}'") + @staticmethod + def _table_row_to_channel_dict(row: Tuple) -> Dict: + """Transform a table row into a channel representation + + Args: + row (list): The database row + """ + + channel = dict() + channel['channel_id'] = row[0] + channel['username'] = row[1] + channel['added_on'] = row[2] + channel['last_commented'] = row[3] + channel['priority'] = row[4] + channel['channel_photo'] = row[5] + return channel + + @staticmethod + def _table_row_to_comment_dict(row: Tuple) -> Dict: + """Transform a table row into a channel representation + + Args: + row (list): The database row + """ + + channel = dict() + channel['video_link'] = row[0] + channel['comment'] = row[1] + channel['comment_time'] = row[2] + channel['like_count'] = row[3] + channel['reply_count'] = row[4] + channel['comment_link'] = row[5] + channel['username'] = row[6] + channel['channel_photo'] = row[7] + return channel + diff --git a/youbot/main.py b/youbot/main.py index 902567c..49f8df3 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -36,6 +36,12 @@ def get_args() -> argparse.Namespace: optional_args.add_argument('-i', '--id', help="The ID of the YouTube Channel") optional_args.add_argument('-u', '--username', help="The Username of the YouTube Channel") + optional_args.add_argument('--n-recent', default=50, + help="Number of recent comments to get for `list_comments`") + optional_args.add_argument('--min_likes', default=-1, + help="Number of minimum liked for `list_comments`") + optional_args.add_argument('--min_replies', default=-1, + help="Number of minimum replies for `list_comments`") optional_args.add_argument('-d', '--debug', action='store_true', help='Enables the debug log messages') optional_args.add_argument("-h", "--help", action="help", help="Show this help message and exit") @@ -66,11 +72,12 @@ def remove_channel(youtube: YoutubeManager, args: argparse.Namespace) -> None: def list_channels(youtube: YoutubeManager, args: argparse.Namespace) -> None: - raise NotImplementedError() + youtube.list_channels() def list_comments(youtube: YoutubeManager, args: argparse.Namespace) -> None: - raise NotImplementedError() + youtube.list_comments(n_recent=args.n_recent, min_likes=args.min_likes, + min_replies=args.min_replies) def refresh_photos(youtube: YoutubeManager, args: argparse.Namespace) -> None: diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 0234527..64d5422 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,5 +1,5 @@ from typing import List, Tuple, Dict, Union, Any -from datetime import datetime, timedelta, timezone +import arrow from youbot import ColorizedLogger, YoutubeMySqlDatastore from .youtube_api import YoutubeApiV3 @@ -39,6 +39,60 @@ def remove_channel(self, channel_id: str = None, username: str = None) -> None: raise YoutubeManagerError("You should either pass channel id or username " "to remove channel!") + def list_channels(self) -> None: + channels = [(row["channel_id"], row["username"].title(), + arrow.get(row["added_on"]).humanize(), + arrow.get(row["last_commented"]).humanize()) + for row in self.db.get_channels()] + + headers = ['Channel Id', 'Channel Name', 'Added On', 'Last Commented'] + self.pretty_print(headers, channels) + + def list_comments(self, n_recent: int = 50, min_likes: int = -1, + min_replies: int = -1) -> None: + + comments = [(row["username"].title(), row["comment"], + arrow.get(row["comment_time"]).humanize(), + row["like_count"], row["reply_count"], row["comment_link"]) + for row in self.db.get_comments(n_recent, min_likes, min_replies)] + + headers = ['Channel', 'Comment', 'Time', 'Likes', 'Replies', 'Comment URL'] + self.pretty_print(headers, comments) + + @staticmethod + def pretty_print(headers: List[str], data: List[Tuple]): + """Print the provided header and data in a visually pleasing manner + + Args: + headers: The headers to print + data: The data rows + """ + + print_str = "\n" + if len(data) == 0: + return + + separators = [] + for word in headers: + separators.append('-' * len(word)) + + output = [headers, separators] + data + + col_widths = [0] * len(headers) + for row in output: + for idx, column in enumerate(row): + if len(str(column)) > 100: + row[idx] = row[idx][:94] + " (...)" + if len(str(row[idx])) > col_widths[idx]: + col_widths[idx] = len(row[idx]) + + for row in output: + for idx, column in enumerate(row): + column = str(column) + print_str += "".join(column.ljust(col_widths[idx])) + " " + print_str += '\n' + logger.info(print_str) + class YoutubeManagerError(Exception): def __init__(self, message): From 25ade68243f53f5e2d9cbf2cc31255be51408d54 Mon Sep 17 00:00:00 2001 From: drkostas Date: Thu, 26 May 2022 18:56:43 -0400 Subject: [PATCH 14/33] Major refactoring! --- Makefile | 73 +-- TODO.md | 5 +- comments/sample_comments.txt | 4 + confs/commenter.yml | 6 + requirements.txt | 17 +- setup.py | 9 +- tests/test_configuration.py | 158 ------ .../actual_output_to_yaml.yml | 7 - .../minimal_conf_correct.yml | 7 - .../test_configuration/minimal_conf_wrong.yml | 7 - .../minimal_yml_schema.json | 44 -- .../simplest_yml_schema.json | 44 -- .../test_configuration/template_conf.yml | 13 - youbot/__init__.py | 11 +- youbot/cloudstore/__init__.py | 7 - youbot/cloudstore/abstract_cloudstore.py | 72 --- youbot/cloudstore/dropbox_cloudstore.py | 106 ---- youbot/configuration/__init__.py | 7 - youbot/configuration/configuration.py | 177 ------ youbot/configuration/yml_schema.json | 17 - youbot/configuration/yml_schema_strict.json | 66 --- youbot/datastore/__init__.py | 7 - youbot/datastore/abstract_datastore.py | 59 -- youbot/datastore/mysql_datastore.py | 524 ------------------ youbot/emailer/__init__.py | 7 - youbot/emailer/abstract_emailer.py | 39 -- youbot/emailer/gmail_emailer.py | 87 --- youbot/fancy_logger/__init__.py | 8 - youbot/fancy_logger/abstract_fancy_logger.py | 19 - youbot/fancy_logger/colorized_logger.py | 151 ----- youbot/main.py | 6 +- youbot/youtube_utils/__init__.py | 7 - youbot/youtube_utils/youtube_api.py | 372 ------------- youbot/youtube_utils/youtube_manager.py | 99 ---- youbot/yt_mysql.py | 235 ++++++++ 35 files changed, 306 insertions(+), 2171 deletions(-) create mode 100644 comments/sample_comments.txt delete mode 100644 tests/test_configuration.py delete mode 100644 tests/test_data/test_configuration/actual_output_to_yaml.yml delete mode 100644 tests/test_data/test_configuration/minimal_conf_correct.yml delete mode 100644 tests/test_data/test_configuration/minimal_conf_wrong.yml delete mode 100644 tests/test_data/test_configuration/minimal_yml_schema.json delete mode 100644 tests/test_data/test_configuration/simplest_yml_schema.json delete mode 100644 tests/test_data/test_configuration/template_conf.yml delete mode 100644 youbot/cloudstore/__init__.py delete mode 100644 youbot/cloudstore/abstract_cloudstore.py delete mode 100644 youbot/cloudstore/dropbox_cloudstore.py delete mode 100644 youbot/configuration/__init__.py delete mode 100644 youbot/configuration/configuration.py delete mode 100644 youbot/configuration/yml_schema.json delete mode 100644 youbot/configuration/yml_schema_strict.json delete mode 100644 youbot/datastore/__init__.py delete mode 100644 youbot/datastore/abstract_datastore.py delete mode 100644 youbot/datastore/mysql_datastore.py delete mode 100644 youbot/emailer/__init__.py delete mode 100644 youbot/emailer/abstract_emailer.py delete mode 100644 youbot/emailer/gmail_emailer.py delete mode 100644 youbot/fancy_logger/__init__.py delete mode 100644 youbot/fancy_logger/abstract_fancy_logger.py delete mode 100644 youbot/fancy_logger/colorized_logger.py delete mode 100644 youbot/youtube_utils/__init__.py delete mode 100644 youbot/youtube_utils/youtube_api.py delete mode 100644 youbot/youtube_utils/youtube_manager.py create mode 100644 youbot/yt_mysql.py diff --git a/Makefile b/Makefile index d6c6be7..75d6871 100644 --- a/Makefile +++ b/Makefile @@ -1,41 +1,34 @@ # Makefile for the youbot module - +.ONESHELL: SHELL=/bin/bash PYTHON_VERSION=3.8 +ENV_NAME="youbot" -# You can use either venv (virtualenv) or conda env by specifying the correct argument (server=) -ifeq ($(server),prod) - # Use Conda - BASE=~/anaconda3/envs/youbot - BIN=$(BASE)/bin - CLEAN_COMMAND="conda env remove -p $(BASE)" - CREATE_COMMAND="conda create --prefix $(BASE) python=$(PYTHON_VERSION) -y" - SETUP_FLAG= - DEBUG=False -else ifeq ($(server),circleci) +# You can use either venv (venv) or conda env +# by specifying the correct argument (env=) +ifeq ($(env),venv) # Use Venv BASE=venv BIN=$(BASE)/bin - CLEAN_COMMAND="rm -rf $(BASE)" CREATE_COMMAND="python$(PYTHON_VERSION) -m venv $(BASE)" - SETUP_FLAG= - DEBUG=True -else ifeq ($(server),local) - # Use Conda - BASE=~/anaconda3/envs/youbot - BIN=$(BASE)/bin - CLEAN_COMMAND="conda env remove -p $(BASE)" - CREATE_COMMAND="conda create --prefix $(BASE) python=$(PYTHON_VERSION) -y" -# SETUP_FLAG='--local' # If you want to use this, you change it in setup.py too - DEBUG=True + DELETE_COMMAND="rm -rf $(BASE)" + ACTIVATE_COMMAND="source venv/bin/activate" + DEACTIVATE_COMMAND="deactivate" else # Use Conda - BASE=~/anaconda3/envs/youbot + BASE=~/anaconda3/envs/$(ENV_NAME) BIN=$(BASE)/bin - CLEAN_COMMAND="conda env remove -p $(BASE)" CREATE_COMMAND="conda create --prefix $(BASE) python=$(PYTHON_VERSION) -y" -# SETUP_FLAG='--local' # If you want to use this, you change it in setup.py too - DEBUG=True + DELETE_COMMAND="conda env remove -p $(BASE)" + ACTIVATE_COMMAND="conda activate -p $(BASE)" + DEACTIVATE_COMMAND="conda deactivate" +endif + +# To load a env file use env_file= +# e.g. make release env_file=.env +ifneq ($(env_file),) + include $(env_file) +# export endif all: @@ -50,26 +43,33 @@ help: @echo @echo "make help" @echo " Display this message" - @echo "make install [server=]" + @echo "make install [env=] [env_file=]" @echo " Call clean delete_conda_env create_conda_env setup run_tests" - @echo "make clean [server=]" + @echo "make clean [env=] [env_file=]" @echo " Delete all './build ./dist ./*.pyc ./*.tgz ./*.egg-info' files" - @echo "make delete_env [server=]" + @echo "make delete_env [env=] [env_file=]" @echo " Delete the current conda env or virtualenv" - @echo "make create_env [server=]" + @echo "make create_env [env=] [env_file=]" @echo " Create a new conda env or virtualenv for the specified python version" - @echo "make setup [server=]" + @echo "make requirements [env=] [env_file=]" + @echo " Install the requirements from the requirements.txt" + @echo "make setup [env=] [env_file=]" @echo " Call setup.py install" - @echo "make run_tests [server=]" + @echo "make run_tests [env=] [env_file=]" @echo " Run all the tests from the specified folder" @echo "-----------------------------------------------------------------------------------------------------------" install: $(MAKE) clean $(MAKE) delete_env $(MAKE) create_env + $(MAKE) requirements $(MAKE) setup $(MAKE) run_tests + @echo -e "\033[0;31m############################################" + @echo @echo "Installation Successful!" + @echo "To activate the conda environment run:" + @echo ' conda activate youbot' clean: $(PYTHON_BIN)python setup.py clean delete_env: @@ -78,10 +78,13 @@ delete_env: create_env: @echo "Creating virtual environment.." eval $(CREATE_COMMAND) -run_tests: - $(BIN)/python setup.py test $(SETUP_FLAG) +requirements: + pip install -r requirements.txt setup: + $(BIN)/pip install setuptools $(BIN)/python setup.py install $(SETUP_FLAG) +run_tests: + $(BIN)/python setup.py test $(SETUP_FLAG) -.PHONY: help install clean delete_env create_env setup run_tests \ No newline at end of file +.PHONY: help install clean delete_env create_env requirements setup run_tests \ No newline at end of file diff --git a/TODO.md b/TODO.md index da8a276..c3b16a9 100644 --- a/TODO.md +++ b/TODO.md @@ -5,10 +5,13 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Build YouTube Manager class - [X] Create child MySQL class - [X] Integrate YoutubeMysql class into the YoutubeManager class +- [ ] Use the pypi packages I have created instead of the local ones - [ ] Create the workflow for the commenter -- [ ] Roll the comments for each channel +- [ ] Roll the comments for each channel - store comments in sql table? +- [ ] Create table with errors - [ ] Create the workflow for the accumulator - [ ] Add SQL script for creating the tables needed +- [ ] Send me email on fatal error - [ ] Recreate the Livestreaming module - [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) - [ ] Use multiple account (different api keys) to check for new comments diff --git a/comments/sample_comments.txt b/comments/sample_comments.txt new file mode 100644 index 0000000..9b60dcd --- /dev/null +++ b/comments/sample_comments.txt @@ -0,0 +1,4 @@ +Hey! I am a bot. +Hello there! +Nice video! +Woo nice! \ No newline at end of file diff --git a/confs/commenter.yml b/confs/commenter.yml index 27aff94..256bcb1 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -23,3 +23,9 @@ youtube: api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl type: normal + sleep_time: 60 +comment: + - config: + comments_list: + path: ../comments/sample_comments.txt + type: local # local, dropbox, or mysql \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 866ab8b..762619b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,15 @@ arrow~=1.1.1 -dropbox~=11.10.0 -gmail~=0.6.3 +bs4 google-api-python-client~=2.7.0 google-auth-oauthlib~=0.4.4 -jsonschema~=3.2.0 httplib2~=0.19.1 -mysql-connector-python~=8.0.19 -mysql-connector~=2.2.9 oauth2client~=4.1.3 python-dateutil~=2.8.1 -PyYAML~=5.4.1 requests~=2.25.1 -setuptools~=52.0.0 -termcolor~=1.1.0 -typer~=0.3.2 \ No newline at end of file +setuptools>=52.0.0 +tqdm +yaml-config-wrapper==1.0.4 +termcolor-logger==1.0.3 +high_sql==1.0.2 +pyemail_sender==1.0.1 +cloud_filemanager==1.0.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 88541b6..fb9643d 100644 --- a/setup.py +++ b/setup.py @@ -34,15 +34,13 @@ def run(self): with open('README.md') as readme_file: readme = readme_file.read() -setup_requirements = [] -test_requirements = [] COMMANDS = [ - 'cli = youbot.cli:app', 'youbot_main = youbot.main:main' ] -data_files = ['youbot/configuration/yml_schema.json'] +data_files = [] +# data_files = ['youbot/configuration/yml_schema.json'] setup( author="drkostas", @@ -63,7 +61,6 @@ def run(self): data_files=[('', data_files)], description="A bot that takes a list of youtube channels and posts the first comment in every new video.", entry_points={'console_scripts': COMMANDS}, - install_requires=requirements, license="MIT license", long_description=readme, include_package_data=True, @@ -73,9 +70,7 @@ def run(self): packages=find_packages(include=['youbot', 'youbot.*']), # py_modules=['main'], - setup_requires=setup_requirements, test_suite='tests', - tests_require=test_requirements, url='https://github.com/drkostas/Youtube-FirstCommentBot', version='2.0', zip_safe=False, diff --git a/tests/test_configuration.py b/tests/test_configuration.py deleted file mode 100644 index 7c6c1f4..0000000 --- a/tests/test_configuration.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python - -"""Tests for `configuration` sub-package.""" -# pylint: disable=redefined-outer-name - -import unittest -from jsonschema.exceptions import ValidationError -from typing import Dict -import logging -import os - -from youbot import Configuration, validate_json_schema - -logger = logging.getLogger('TestConfiguration') - - -class TestConfiguration(unittest.TestCase): - - def test_validation_library(self): - """ Sanity Check unittest""" - configuration_schema = Configuration.load_configuration_schema( - os.path.join(self.test_data_path, 'simplest_yml_schema.json')) - wrong_confs = [ - {"subproperty1": [123, 234], - "subproperty2": 1}, # p1 is string - - {"subproperty1": "10", - "subproperty2": 3}, # p2 is either 1 or 2 - - {"subproperty2": 1}, # p1 is required - - {"subproperty1": "10", - "subproperty2": 1, - "subproperty3": {}}, # p4 is required in p3 - - {"subproperty1": "10", - "subproperty2": 1, - "subproperty3": {"subproperty4": 15}} # p4 is either 1 or 2 - ] - for wrong_conf in wrong_confs: - with self.assertRaises(ValidationError): - # try: - validate_json_schema(wrong_conf, configuration_schema) - # except Exception as e: - # print(e) - logger.info('YMLs failed to validate successfully.') - - def test_schema_validation(self): - try: - logger.info('Loading the correct Configuration..') - Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), - config_schema_path=os.path.join(self.test_data_path, - 'minimal_yml_schema.json')) - except ValidationError as e: - logger.error('Error validating the correct yml: %s', e) - self.fail('Error validating the correct yml') - except Exception as e: - raise e - else: - logger.info('First yml validated successfully.') - - with self.assertRaises(ValidationError): - logger.info('Loading the wrong Configuration..') - Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_wrong.yml'), - config_schema_path=os.path.join(self.test_data_path, - 'minimal_yml_schema.json')) - logger.info('Second yml failed to validate successfully.') - - def test_to_json(self): - logger.info('Loading Configuration..') - configuration = Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), - config_schema_path=os.path.join(self.test_data_path, - 'minimal_yml_schema.json')) - - expected_json = {'datastore': 'test', - 'cloudstore': [{ - 'subproperty1': 1, - 'subproperty2': [123, 234] - }], - 'tag': 'test_tag'} - # Compare - logger.info('Comparing the results..') - self.assertDictEqual(self._sort_dict(expected_json), self._sort_dict(configuration.to_json())) - - def test_to_yaml(self): - logger.info('Loading Configuration..') - configuration = Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), - config_schema_path=os.path.join(self.test_data_path, - 'minimal_yml_schema.json')) - # Modify and export yml - logger.info('Changed the host and the api_key..') - configuration.config['cloudstore'][0]['subproperty1'] = 999 - configuration.tag = 'CHANGED VALUE' - logger.info('Exporting to yaml..') - configuration.to_yaml(os.path.join(self.test_data_path, - 'actual_output_to_yaml.yml')) - # Load the modified yml - logger.info('Loading the exported yaml..') - modified_configuration = Configuration( - config_src=os.path.join(self.test_data_path, 'actual_output_to_yaml.yml')) - # Compare - logger.info('Comparing the results..') - expected_json = {'datastore': 'test', - 'cloudstore': [{ - 'subproperty1': 999, - 'subproperty2': [123, 234] - }], - 'tag': 'CHANGED VALUE'} - self.assertDictEqual(self._sort_dict(expected_json), self._sort_dict(modified_configuration.to_json())) - - def test_get_config(self): - logger.info('Loading Configuration..') - configuration = Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), - config_schema_path=os.path.join(self.test_data_path, - 'minimal_yml_schema.json')) - cloudstore_config = configuration.get_config(config_name='cloudstore') - expected_json = [{ - 'subproperty1': 1, - 'subproperty2': [123, 234] - }] - # Compare - logger.info('Comparing the results..') - self.assertListEqual(expected_json, cloudstore_config) - - @classmethod - def _sort_dict(cls, dictionary: Dict) -> Dict: - return {k: cls._sort_dict(v) if isinstance(v, dict) else v - for k, v in sorted(dictionary.items())} - - @staticmethod - def _setup_log() -> None: - # noinspection PyArgumentList - logging.basicConfig(level=logging.DEBUG, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - handlers=[logging.StreamHandler() - ] - ) - - def setUp(self) -> None: - pass - - def tearDown(self) -> None: - pass - - @classmethod - def setUpClass(cls): - cls._setup_log() - cls.tests_abs_path = os.path.abspath(os.path.dirname(__file__)) - cls.test_data_path: str = os.path.join(cls.tests_abs_path, 'test_data', 'test_configuration') - - @classmethod - def tearDownClass(cls): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_data/test_configuration/actual_output_to_yaml.yml b/tests/test_data/test_configuration/actual_output_to_yaml.yml deleted file mode 100644 index 32212e6..0000000 --- a/tests/test_data/test_configuration/actual_output_to_yaml.yml +++ /dev/null @@ -1,7 +0,0 @@ -cloudstore: -- subproperty1: 999 - subproperty2: - - 123 - - 234 -datastore: test -tag: CHANGED VALUE diff --git a/tests/test_data/test_configuration/minimal_conf_correct.yml b/tests/test_data/test_configuration/minimal_conf_correct.yml deleted file mode 100644 index 125c031..0000000 --- a/tests/test_data/test_configuration/minimal_conf_correct.yml +++ /dev/null @@ -1,7 +0,0 @@ -datastore: test -cloudstore: - - subproperty1: 1 - subproperty2: - - 123 - - 234 -tag: test_tag \ No newline at end of file diff --git a/tests/test_data/test_configuration/minimal_conf_wrong.yml b/tests/test_data/test_configuration/minimal_conf_wrong.yml deleted file mode 100644 index 194b5ab..0000000 --- a/tests/test_data/test_configuration/minimal_conf_wrong.yml +++ /dev/null @@ -1,7 +0,0 @@ -datastore: test -cloudstore: - - subproperty1: 10 - subproperty2: - - 123 - - 234 -tag: test_tag \ No newline at end of file diff --git a/tests/test_data/test_configuration/minimal_yml_schema.json b/tests/test_data/test_configuration/minimal_yml_schema.json deleted file mode 100644 index b3bfb0d..0000000 --- a/tests/test_data/test_configuration/minimal_yml_schema.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "type": "object", - "properties": { - "datastore": { - "type": "string" - }, - "tag": { - "type": "string" - }, - "cloudstore": { - "$ref": "#/definitions/cloudstore" - } - }, - "required": [ - "tag" - ], - "definitions": { - "cloudstore": { - "type": "array", - "items": { - "type": "object", - "required": [ - "subproperty1", - "subproperty2" - ], - "properties": { - "subproperty1": { - "type": "number", - "enum": [ - 1, - 2 - ] - }, - "subproperty2": { - "type": "array" - } - } - }, - "additionalProperties": false - } - }, - "additionalProperties": false -} \ No newline at end of file diff --git a/tests/test_data/test_configuration/simplest_yml_schema.json b/tests/test_data/test_configuration/simplest_yml_schema.json deleted file mode 100644 index d54bbbd..0000000 --- a/tests/test_data/test_configuration/simplest_yml_schema.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "type": "object", - "properties": { - "subproperty1": { - "type": "string" - }, - "subproperty2": { - "type": "number", - "enum": [ - 1, - 2 - ] - }, - "subproperty3": { - "$ref": "#/definitions/subproperty3" - } - }, - "required": [ - "subproperty1" - ], - "definitions": { - "subproperty3": { - "type": "object", - "items": { - "type": "object" - }, - "additionalProperties": false, - "required": [ - "subproperty4" - ], - "properties": { - "subproperty4": { - "type": "number", - "enum": [ - 1, - 2 - ] - } - } - } - }, - "additionalProperties": false -} \ No newline at end of file diff --git a/tests/test_data/test_configuration/template_conf.yml b/tests/test_data/test_configuration/template_conf.yml deleted file mode 100644 index 27ef9a9..0000000 --- a/tests/test_data/test_configuration/template_conf.yml +++ /dev/null @@ -1,13 +0,0 @@ -tag: production -cloudstore: - - config: - api_key: apiqwerty - type: dropbox -datastore: - - config: - hostname: host123 - username: user1 - password: pass2 - db_name: db3 - port: 3306 - type: mysql \ No newline at end of file diff --git a/youbot/__init__.py b/youbot/__init__.py index e536fea..ad2adba 100644 --- a/youbot/__init__.py +++ b/youbot/__init__.py @@ -1,10 +1,11 @@ """Top-level package for YoutubeCommentBot.""" -from youbot.fancy_logger import ColorizedLogger -from youbot.configuration import Configuration, validate_json_schema -from youbot.cloudstore import DropboxCloudstore -from youbot.datastore import YoutubeMySqlDatastore -from youbot.emailer import GmailEmailer +from termcolor_logger import ColorLogger +from yaml_config_wrapper import Configuration, validate_json_schema +from cloud_filemanager import DropboxCloudManager +from high_sql import HighMySQL +from pyemail_sender import GmailPyEmailSender +from .yt_mysql import YoutubeMySqlDatastore from youbot.youtube_utils import YoutubeManager, YoutubeApiV3 __author__ = "drkostas" diff --git a/youbot/cloudstore/__init__.py b/youbot/cloudstore/__init__.py deleted file mode 100644 index 008a435..0000000 --- a/youbot/cloudstore/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Cloudstore sub-package of YoutubeCommentBot.""" - -from .dropbox_cloudstore import DropboxCloudstore - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" diff --git a/youbot/cloudstore/abstract_cloudstore.py b/youbot/cloudstore/abstract_cloudstore.py deleted file mode 100644 index d626230..0000000 --- a/youbot/cloudstore/abstract_cloudstore.py +++ /dev/null @@ -1,72 +0,0 @@ -from abc import ABC, abstractmethod - - -class AbstractCloudstore(ABC): - __slots__ = ('_handler',) - - @abstractmethod - def __init__(self, *args, **kwargs) -> None: - """ - Tha basic constructor. Creates a new instance of Cloudstore using the specified credentials - """ - - pass - - @staticmethod - @abstractmethod - def get_handler(*args, **kwargs): - """ - Returns a Cloudstore handler. - - :param args: - :param kwargs: - :return: - """ - - pass - - @abstractmethod - def upload_file(self, *args, **kwargs): - """ - Uploads a file to the Cloudstore - - :param args: - :param kwargs: - :return: - """ - - pass - - @abstractmethod - def download_file(self, *args, **kwargs): - """ - Downloads a file from the Cloudstore - - :param args: - :param kwargs: - :return: - """ - - pass - - @abstractmethod - def delete_file(self, *args, **kwargs): - """ - Deletes a file from the Cloudstore - - :param args: - :param kwargs: - :return: - """ - - pass - - @abstractmethod - def ls(self, *args, **kwargs): - """ - List the files and folders in the Cloudstore - :param args: - :param kwargs: - :return: - """ - pass diff --git a/youbot/cloudstore/dropbox_cloudstore.py b/youbot/cloudstore/dropbox_cloudstore.py deleted file mode 100644 index 5646208..0000000 --- a/youbot/cloudstore/dropbox_cloudstore.py +++ /dev/null @@ -1,106 +0,0 @@ -from typing import Dict, Union -from dropbox import Dropbox, files, exceptions - -from .abstract_cloudstore import AbstractCloudstore -from youbot import ColorizedLogger - -logger = ColorizedLogger('DropboxCloudstore') - - -class DropboxCloudstore(AbstractCloudstore): - __slots__ = '_handler' - - _handler: Dropbox - - def __init__(self, config: Dict) -> None: - """ - The basic constructor. Creates a new instance of Cloudstore using the specified credentials - - :param config: - """ - - self._handler = self.get_handler(api_key=config['api_key']) - super().__init__() - - @staticmethod - def get_handler(api_key: str) -> Dropbox: - """ - Returns a Cloudstore handler. - - :param api_key: - :return: - """ - - dbx = Dropbox(api_key) - return dbx - - def upload_file(self, file_bytes: bytes, upload_path: str, write_mode: str = 'overwrite') -> None: - """ - Uploads a file to the Cloudstore - - :param file_bytes: - :param upload_path: - :param write_mode: - :return: - """ - - # TODO: Add option to support FileStream, StringIO and FilePath - try: - logger.debug("Uploading file to path: %s" % upload_path) - self._handler.files_upload(f=file_bytes, path=upload_path, - mode=files.WriteMode(write_mode)) - except exceptions.ApiError as err: - logger.error('API error: %s' % err) - - def download_file(self, frompath: str, tofile: str = None) -> Union[bytes, None]: - """ - Downloads a file from the Cloudstore - - :param frompath: - :param tofile: - :return: - """ - - try: - if tofile is not None: - logger.debug("Downloading file from path: %s to path %s" % (frompath, tofile)) - self._handler.files_download_to_file(download_path=tofile, path=frompath) - else: - logger.debug("Downloading file from path: %s to variable" % frompath) - md, res = self._handler.files_download(path=frompath) - data = res.content # The bytes of the file - return data - except exceptions.HttpError as err: - logger.error('HTTP error %s' % err) - return None - - def delete_file(self, file_path: str) -> None: - """ - Deletes a file from the Cloudstore - - :param file_path: - :return: - """ - - try: - logger.debug("Deleting file from path: %s" % file_path) - self._handler.files_delete_v2(path=file_path) - except exceptions.ApiError as err: - logger.error('API error %s' % err) - - def ls(self, path: str = '') -> Dict: - """ - List the files and folders in the Cloudstore - - :param path: - :return: - """ - try: - files_list = self._handler.files_list_folder(path=path) - files_dict = {} - for entry in files_list.entries: - files_dict[entry.name] = entry - return files_dict - except exceptions.ApiError as err: - logger.error('Folder listing failed for %s -- assumed empty: %s' % (path, err)) - return {} diff --git a/youbot/configuration/__init__.py b/youbot/configuration/__init__.py deleted file mode 100644 index 910fcb1..0000000 --- a/youbot/configuration/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Configuration sub-package of YoutubeCommentBot.""" - -from .configuration import Configuration, validate_json_schema - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" diff --git a/youbot/configuration/configuration.py b/youbot/configuration/configuration.py deleted file mode 100644 index b570001..0000000 --- a/youbot/configuration/configuration.py +++ /dev/null @@ -1,177 +0,0 @@ -import os -from typing import Dict, List, Tuple, Union -import json -import _io -from io import StringIO, TextIOWrapper -import re -import yaml -from jsonschema import validate as validate_json_schema - -from youbot import ColorizedLogger - -logger = ColorizedLogger('Config', 'white') - - -class Configuration: - __slots__ = ('config', 'config_path', 'config_keys', 'tag') - - config: Dict - config_path: str - tag: str - config_keys: List - env_variable_tag: str = '!ENV' - env_variable_pattern: str = r'.*?\${(\w+)}.*?' # ${var} - - def __init__(self, config_src: Union[TextIOWrapper, StringIO, str], - config_schema_path: str = 'yml_schema.json'): - """ - The basic constructor. Creates a new instance of the Configuration class. - - Args: - config_src: The path, file or StringIO object of the configuration to load - config_schema_path: The path, file or StringIO object of the configuration validation file - """ - - # Load the predefined schema of the configuration - configuration_schema = self.load_configuration_schema(config_schema_path=config_schema_path) - # Load the configuration - self.config, self.config_path = self.load_yml(config_src=config_src, - env_tag=self.env_variable_tag, - env_pattern=self.env_variable_pattern) - # Validate the config - validate_json_schema(self.config, configuration_schema) - logger.debug("Schema Validation was Successful.") - # Set the config properties as instance attributes - self.tag = self.config['tag'] - self.config_keys = [key for key in self.config.keys() if key != 'tag'] - logger.info(f"Configuration file loaded successfully from path: {self.config_path}") - logger.info(f"Configuration Tag: {self.tag}") - - @staticmethod - def load_configuration_schema(config_schema_path: str) -> Dict: - """ - Loads the configuration schema file - - Args: - config_schema_path: The path of the config schema - - Returns: - configuration_schema: The loaded config schema - """ - - if config_schema_path[0] != os.sep: - config_schema_path = '/'.join( - [os.path.dirname(os.path.realpath(__file__)), config_schema_path]) - with open(config_schema_path) as f: - configuration_schema = json.load(f) - return configuration_schema - - @staticmethod - def load_yml(config_src: Union[TextIOWrapper, StringIO, str], env_tag: str, env_pattern: str) -> \ - Tuple[Dict, str]: - """ - Loads the configuration file - Args: - config_src: The path of the configuration - env_tag: The tag that distinguishes the env variables - env_pattern: The regex for finding the env variables - - Returns: - config, config_path - """ - pattern = re.compile(env_pattern) - loader = yaml.SafeLoader - loader.add_implicit_resolver(env_tag, pattern, None) - - def constructor_env_variables(loader, node): - """ - Extracts the environment variable from the node's value - :param yaml.Loader loader: the yaml loader - :param node: the current node in the yaml - :return: the parsed string that contains the value of the environment - variable - """ - value = loader.construct_scalar(node) - match = pattern.findall(value) # to find all env variables in line - if match: - full_value = value - for g in match: - full_value = full_value.replace( - f'${{{g}}}', os.environ.get(g, g) - ) - return full_value - return value - - loader.add_constructor(env_tag, constructor_env_variables) - - if isinstance(config_src, TextIOWrapper): - logger.debug("Loading yaml from TextIOWrapper") - config = yaml.load(config_src, Loader=loader) - config_path = os.path.abspath(config_src.name) - elif isinstance(config_src, StringIO): - logger.debug("Loading yaml from StringIO") - config = yaml.load(config_src, Loader=loader) - config_path = "StringIO" - elif isinstance(config_src, str): - config_path = os.path.abspath(config_src) - logger.debug("Loading yaml from path") - with open(config_path) as f: - config = yaml.load(f, Loader=loader) - else: - raise TypeError('Config file must be TextIOWrapper or path to a file') - return config, config_path - - def get_config(self, config_name) -> List: - """ - Returns the subconfig requested - - Args: - config_name: The name of the subconfig - - Returns: - sub_config: The sub_configs List - """ - - if config_name in self.config.keys(): - return self.config[config_name] - else: - raise ConfigurationError('Config property %s not set!' % config_name) - - def to_yml(self, fn: Union[str, _io.TextIOWrapper]) -> None: - """ - Writes the configuration to a stream. For example a file. - - Args: - fn: - - Returns: - """ - - self.config['tag'] = self.tag - if isinstance(fn, str): - with open(fn, 'w') as f: - yaml.dump(self.config, f, default_flow_style=False) - elif isinstance(fn, _io.TextIOWrapper): - yaml.dump(self.config, fn, default_flow_style=False) - else: - raise TypeError('Expected str or _io.TextIOWrapper not %s' % (type(fn))) - - to_yaml = to_yml - - def to_json(self) -> Dict: - """ - Returns the whole config file - - Returns: - - """ - return self.config - - # def __getitem__(self, item): - # return self.get_config(item) - - -class ConfigurationError(Exception): - def __init__(self, message): - # Call the base class constructor with the parameters it needs - super().__init__(message) diff --git a/youbot/configuration/yml_schema.json b/youbot/configuration/yml_schema.json deleted file mode 100644 index b3e6c14..0000000 --- a/youbot/configuration/yml_schema.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "title": "Python Configuration", - "description": "A json for python configuration in yml format", - "type": "object", - "properties": { - "tag": { - "type": "string" - } - }, - "required": [ - "tag" - ], - "definitions": { - }, - "additionalProperties": true -} \ No newline at end of file diff --git a/youbot/configuration/yml_schema_strict.json b/youbot/configuration/yml_schema_strict.json deleted file mode 100644 index 0856d43..0000000 --- a/youbot/configuration/yml_schema_strict.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "type": "object", - "properties": { - "tag": { - "type": "string" - }, - "example_db": { - "$ref": "#/definitions/example_db" - } - }, - "required": [ - "tag", - "example_db" - ], - "definitions": { - "example_db": { - "type": "array", - "items": { - "type": "object", - "required": [ - "type", - "properties" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "mysql", - "mongodb" - ] - }, - "properties": { - "type": "object", - "additionalProperties": false, - "required": [ - "hostname", - "username", - "password", - "db_name" - ], - "properties": { - "hostname": { - "type": "string" - }, - "username": { - "type": "string" - }, - "password": { - "type": "string" - }, - "db_name": { - "type": "string" - }, - "port": { - "type": "integer" - } - } - } - } - }, - "additionalProperties": false - } - }, - "additionalProperties": false -} \ No newline at end of file diff --git a/youbot/datastore/__init__.py b/youbot/datastore/__init__.py deleted file mode 100644 index 6918bc4..0000000 --- a/youbot/datastore/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Cloudstore sub-package of YoutubeCommentBot.""" - -from .mysql_datastore import YoutubeMySqlDatastore - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" diff --git a/youbot/datastore/abstract_datastore.py b/youbot/datastore/abstract_datastore.py deleted file mode 100644 index bde2319..0000000 --- a/youbot/datastore/abstract_datastore.py +++ /dev/null @@ -1,59 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List, Dict - - -class AbstractDatastore(ABC): - __slots__ = ('_connection', '_cursor') - - @abstractmethod - def __init__(self, config: Dict) -> None: - """ - Tha basic constructor. Creates a new instance of Datastore using the specified credentials - - :param config: - """ - - self._connection, self._cursor = self.get_connection(username=config['username'], - password=config['password'], - hostname=config['hostname'], - db_name=config['db_name'], - port=config['port']) - - @staticmethod - @abstractmethod - def get_connection(username: str, password: str, hostname: str, db_name: str, port: int): - pass - - @abstractmethod - def create_table(self, table: str, schema: str): - pass - - @abstractmethod - def drop_table(self, table: str) -> None: - pass - - @abstractmethod - def truncate_table(self, table: str) -> None: - pass - - @abstractmethod - def insert_into_table(self, table: str, data: dict) -> None: - pass - - @abstractmethod - def update_table(self, table: str, set_data: dict, where: str) -> None: - pass - - @abstractmethod - def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', - order_by: str = 'NULL', - asc_or_desc: str = 'ASC', limit: int = 1000) -> List: - pass - - @abstractmethod - def delete_from_table(self, table: str, where: str) -> None: - pass - - @abstractmethod - def show_tables(self, *args, **kwargs) -> List: - pass diff --git a/youbot/datastore/mysql_datastore.py b/youbot/datastore/mysql_datastore.py deleted file mode 100644 index be6303f..0000000 --- a/youbot/datastore/mysql_datastore.py +++ /dev/null @@ -1,524 +0,0 @@ -from typing import List, Tuple, Dict - -from datetime import datetime -from mysql import connector as mysql_connector -import mysql.connector.cursor - -from .abstract_datastore import AbstractDatastore -from youbot import ColorizedLogger - -logger = ColorizedLogger('MySqlDataStore') - - -class MySqlDatastore(AbstractDatastore): - __slots__ = ('_connection', '_cursor') - - _connection: mysql_connector.MySQLConnection - _cursor: mysql_connector.cursor.MySQLCursor - - def __init__(self, config: Dict) -> None: - """ - The basic constructor. Creates a new instance of Datastore using the specified credentials - - :param config: - """ - - super().__init__(config) - - @staticmethod - def get_connection(username: str, password: str, hostname: str, db_name: str, port: int = 3306) \ - -> Tuple[mysql_connector.MySQLConnection, mysql_connector.cursor.MySQLCursor]: - """ - Creates and returns a connection and a cursor/session to the MySQL DB - - :param username: - :param password: - :param hostname: - :param db_name: - :param port: - :return: - """ - - connection = mysql_connector.connect( - host=hostname, - user=username, - passwd=password, - database=db_name, - use_pure=True - ) - - cursor = connection.cursor() - return connection, cursor - - def execute_query(self, query: str, commit: bool = False, - fetchall: bool = False, fetchone: bool = False) -> List[Tuple]: - """ - Execute a query in the DB. - Args: - query: - commit: - fetchall: - fetchone: - """ - - logger.debug("Executing: %s" % query) - try: - self._cursor.execute(query) - if commit: - self.commit() - if fetchall: - return self._cursor.fetchall() - if fetchone: - return self._cursor.fetchone() - except mysql.connector.errors.ProgrammingError as e: - logger.error(f'MySQL Error: {e}') - logger.error(f'Full Query: {query}') - - def create_table(self, table: str, schema: str) -> None: - """ - Creates a table using the specified schema - - :param self: - :param table: - :param schema: - :return: - """ - - query = "CREATE TABLE IF NOT EXISTS {table} ({schema})".format(table=table, schema=schema) - self.execute_query(query, commit=True) - - def drop_table(self, table: str) -> None: - """ - Drops the specified table if it exists - - :param self: - :param table: - :return: - """ - - query = "DROP TABLE IF EXISTS {table}".format(table=table) - self.execute_query(query, commit=True) - - def truncate_table(self, table: str) -> None: - """ - Truncates the specified table - - :param self: - :param table: - :return: - """ - - query = "TRUNCATE TABLE {table}".format(table=table) - self.execute_query(query, commit=True) - - def insert_into_table(self, table: str, data: dict, if_not_exists: bool = False) -> None: - """ - Inserts into the specified table a row based on a column_name: value dictionary - - :param self: - :param table: - :param data: - :param if_not_exists: - :return: - """ - - data_str = ", ".join( - list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), data.keys(), - data.values()))) - if if_not_exists: - ignore = 'IGNORE' - else: - ignore = '' - query = f"INSERT {ignore} INTO {table} SET {data_str}" - self.execute_query(query, commit=True) - - def update_table(self, table: str, set_data: dict, where: str) -> None: - """ - Updates the specified table using a column_name: value dictionary and a where statement - - :param self: - :param table: - :param set_data: - :param where: - :return: - """ - - set_data_str = ", ".join( - list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), - set_data.keys(), - set_data.values()))) - - query = "UPDATE {table} SET {data} WHERE {where}".format(table=table, data=set_data_str, - where=where) - self.execute_query(query, commit=True) - - def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', - order_by: str = 'NULL', asc_or_desc: str = 'ASC', limit: int = 1000, - group_by: str = '', having: str = '') -> List[Tuple]: - """ - Selects from a specified table based on the given columns, where, ordering and limit - - Args: - table: - columns: - where: - order_by: - asc_or_desc: - limit: - group_by: - having: - """ - - # Construct Group By - if group_by: - if having: - having = f'HAVING {having}' - group_by = f'GROUP BY {group_by} {having} ' - - # Build the Query - query = f"SELECT {columns} " \ - f"FROM {table} " \ - f"WHERE {where} " \ - f"{group_by}" \ - f"ORDER BY {order_by} {asc_or_desc} " \ - f"LIMIT {limit}" - - results = self.execute_query(query, fetchall=True) - - return results - - def select_join(self, left_table: str, right_table: str, - join_key_left: str, join_key_right: str, - left_columns: str = '', right_columns: str = '', custom_columns: str = '', - join_type: str = 'INNER', - where: str = 'TRUE', order_by: str = 'NULL', asc_or_desc: str = 'ASC', - limit: int = 1000, group_by: str = '', having: str = '') -> List[Tuple]: - """ - Join two tables and select. - - Args: - left_table: - right_table: - left_columns: - right_columns: - custom_columns: Custom columns for which no `l.` or `r.` will be added automatically - join_key_left: The column of join of the left table - join_key_right: The column of join of the right table - join_type: OneOf(INNER, LEFT, RIGHT) - where: Add a `l.` or `.r` before the specified columns - order_by: Add a `l.` or `.r` before the specified columns - asc_or_desc: - limit: - group_by: Add a `l.` or `.r` before the specified columns - having: Add a `l.` or `.r` before the specified columns - """ - - # Construct Group By - if group_by: - if having: - having = f'HAVING {having}' - group_by = f'GROUP BY {group_by} {having} ' - - # Construct Columns - if left_columns: - left_columns = 'l.' + ', l.'.join(map(str.strip, left_columns.split(','))) - if right_columns or custom_columns: - left_columns += ', ' - if right_columns: - right_columns = 'r.' + ', r.'.join(map(str.strip, right_columns.split(','))) - if custom_columns: - right_columns += ', ' - columns = f'{left_columns} {right_columns} {custom_columns}' - - # Build the Query - query = f"SELECT {columns} " \ - f"FROM {left_table} l " \ - f"{join_type} JOIN {right_table} r " \ - f"ON l.{join_key_left}=r.{join_key_right} " \ - f"WHERE {where} " \ - f"{group_by}" \ - f"ORDER BY {order_by} {asc_or_desc} " \ - f"LIMIT {limit}" - - print(query) - results = self.execute_query(query, fetchall=True) - - return results - - def delete_from_table(self, table: str, where: str) -> None: - """ - Deletes data from the specified table based on a where statement - - :param self: - :param table: - :param where: - :return: - """ - - query = "DELETE FROM {table} WHERE {where}".format(table=table, where=where) - self.execute_query(query, commit=True) - - def show_tables(self) -> List: - """ - Show a list of the tables present in the db - :return: - """ - - query = 'SHOW TABLES' - results = self.execute_query(query, fetchall=True) - - return [result[0] for result in results] - - def commit(self) -> None: - self._connection.commit() - - def close_connection(self) -> None: - """ - Flushes and closes the connection - - :return: - """ - - self.commit() - self._cursor.close() - - __exit__ = close_connection - - -class YoutubeMySqlDatastore(MySqlDatastore): - CHANNEL_TABLE = 'channels' - COMMENTS_TABLE = 'comments' - - def __init__(self, config: Dict) -> None: - """ - The basic constructor. Creates a new instance of Datastore using the specified credentials - - :param config: - """ - - super().__init__(config) - self.create_tables_if_not_exist() - - def create_tables_if_not_exist(self): - channels_schema = \ - """ - channel_id varchar(100) default '' not null, - username varchar(100) not null, - added_on varchar(100) not null, - last_commented varchar(100) not null, - priority int auto_increment, - channel_photo varchar(100) default '-1' null, - constraint id_pk PRIMARY KEY (channel_id), - constraint channel_id unique (channel_id), - constraint priority unique (priority), - constraint username unique (username)""" - comments_schema = \ - """ - channel_id varchar(100) not null, - video_link varchar(100) not null, - comment varchar(255) not null, - comment_time varchar(100) not null, - like_count int default -1 null, - reply_count int default -1 null, - comment_id varchar(100) default '-1' null, - video_id varchar(100) default '-1' null, - comment_link varchar(100) default '-1' null, - constraint video_link_pk PRIMARY KEY (video_link), - constraint video_link unique (video_link), - constraint channel_id foreign key (channel_id) references channels (channel_id) on update cascade on delete cascade""" - - self.create_table(table=self.CHANNEL_TABLE, schema=channels_schema) - self.create_table(table=self.COMMENTS_TABLE, schema=comments_schema) - - def get_channels(self) -> List[Dict]: - """ Retrieve all channels from the database. """ - - result = self.select_from_table(table=self.CHANNEL_TABLE, order_by='priority') - for row in result: - yield self._table_row_to_channel_dict(row) - - def add_channel(self, channel_data: Dict) -> None: - """ Insert the provided channel into the database""" - - try: - self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data, if_not_exists=True) - except mysql.connector.errors.IntegrityError as e: - logger.error(f"MySQL error: {e}") - - def get_channel_by_id(self, ch_id: str) -> Tuple: - """Retrieve a channel from the database by its ID - - Args: - ch_id (str): The channel ID - """ - - where_statement = f"id='{ch_id}'" - result = self.select_from_table(table=self.CHANNEL_TABLE, where=where_statement) - if len(result) > 1: - logger.warning("Duplicate channel retrieved from SELECT statement:{result}") - elif len(result) == 0: - result.append(()) - - return result[0] - - def get_channel_by_username(self, ch_username: str) -> Tuple: - """Retrieve a channel from the database by its Username - - Args: - ch_username (str): The channel ID - """ - - where_statement = f"username='{ch_username}'" - result = self.select_from_table(table=self.CHANNEL_TABLE, where=where_statement) - if len(result) > 1: - logger.warning("Duplicate channel retrieved from SELECT statement:{result}") - elif len(result) == 0: - result.append(()) - - return result[0] - - def remove_channel_by_id(self, ch_id: str) -> None: - """Retrieve a channel from the database by its ID - - Args: - ch_id (str): The channel ID - """ - - where_statement = f"id='{ch_id}'" - self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) - - def remove_channel_by_username(self, ch_username: str) -> None: - """Delete a channel from the database by its Username - - Args: - ch_username (str): The channel ID - """ - - where_statement = f"username='{ch_username}'" - self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) - - def update_channel_photo(self, channel_id: str, photo_url: str) -> None: - """ - Update the profile picture link of a channel. - Args: - channel_id: - photo_url: - """ - - set_data = {'channel_photo': photo_url} - self.update_table(table=self.CHANNEL_TABLE, - set_data=set_data, - where=f"channel_id='{channel_id}'") - - def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: - """ TODO: check the case where a comment contains single quotes - Add comment data and update the `last_commented` channel column. - - Args: - ch_id: - video_link: - comment_text: - """ - - datetime_now = datetime.utcnow().isoformat() - comments_data = {'channel_id': ch_id, - 'video_link': video_link, - 'comment': comment_text, - 'comment_time': datetime_now} - update_data = {'last_commented': datetime_now} - where_statement = f"channel_id='{ch_id}'" - - try: - self.insert_into_table(self.COMMENTS_TABLE, data=comments_data) - # Update Channel's last_commented timestamp - self.update_table(table=self.CHANNEL_TABLE, set_data=update_data, where=where_statement) - except mysql.connector.errors.IntegrityError as e: - logger.error(f"MySQL Error: {e}") - - def get_comments(self, n_recent: int = 50, min_likes: int = -1, - min_replies: int = -1) -> List[Dict]: - """ - Get the latest n_recent comments from the comments table. - Args: - n_recent: - min_likes: - min_replies: - """ - - comment_cols = 'video_link, comment, comment_time, like_count, reply_count, comment_link' - channel_cols = 'username, channel_photo' - where = f'l.like_count>={min_likes} AND l.reply_count>={min_replies} ' - for comment in self.select_join(left_table=self.COMMENTS_TABLE, - right_table=self.CHANNEL_TABLE, - left_columns=comment_cols, - right_columns=channel_cols, - join_key_left='channel_id', - join_key_right='channel_id', - where=where, - order_by='l.comment_time', - asc_or_desc='desc', - limit=n_recent): - yield self._table_row_to_comment_dict(comment) - - def update_comment(self, video_link: str, comment_id: str, - like_cnt: int, reply_cnt: int) -> None: - """ - Populate a comment entry with additional information. - Args: - video_link: - comment_id: - like_cnt: - reply_cnt: - """ - - # Get video id - video_id = video_link.split('v=')[1].split('&')[0] - # Create Comment Link - comment_link = f'https://youtube.com/watch?v={video_id}&lc={comment_id}' - # Construct the update key-values - set_data = {'comment_link': comment_link, - 'video_id': video_id, - 'comment_id': comment_id, - 'like_count': like_cnt, - 'reply_count': reply_cnt} - # Execute the update command - self.update_table(table=self.COMMENTS_TABLE, - set_data=set_data, - where=f"video_link='{video_link}'") - - @staticmethod - def _table_row_to_channel_dict(row: Tuple) -> Dict: - """Transform a table row into a channel representation - - Args: - row (list): The database row - """ - - channel = dict() - channel['channel_id'] = row[0] - channel['username'] = row[1] - channel['added_on'] = row[2] - channel['last_commented'] = row[3] - channel['priority'] = row[4] - channel['channel_photo'] = row[5] - return channel - - @staticmethod - def _table_row_to_comment_dict(row: Tuple) -> Dict: - """Transform a table row into a channel representation - - Args: - row (list): The database row - """ - - channel = dict() - channel['video_link'] = row[0] - channel['comment'] = row[1] - channel['comment_time'] = row[2] - channel['like_count'] = row[3] - channel['reply_count'] = row[4] - channel['comment_link'] = row[5] - channel['username'] = row[6] - channel['channel_photo'] = row[7] - return channel - diff --git a/youbot/emailer/__init__.py b/youbot/emailer/__init__.py deleted file mode 100644 index d7f864f..0000000 --- a/youbot/emailer/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Emailer sub-package of YoutubeCommentBot.""" - -from .gmail_emailer import GmailEmailer - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" diff --git a/youbot/emailer/abstract_emailer.py b/youbot/emailer/abstract_emailer.py deleted file mode 100644 index 17b85d7..0000000 --- a/youbot/emailer/abstract_emailer.py +++ /dev/null @@ -1,39 +0,0 @@ -from abc import ABC, abstractmethod - - -class AbstractEmailer(ABC): - __slots__ = ('_handler',) - - @abstractmethod - def __init__(self, *args, **kwargs) -> None: - """ - Tha basic constructor. Creates a new instance of EmailApp using the specified credentials - - """ - - pass - - @staticmethod - @abstractmethod - def get_handler(*args, **kwargs): - """ - Returns an EmailApp handler. - - :param args: - :param kwargs: - :return: - """ - - pass - - @abstractmethod - def send_email(self, *args, **kwargs): - """ - Sends an email with the specified arguments. - - :param args: - :param kwargs: - :return: - """ - - pass diff --git a/youbot/emailer/gmail_emailer.py b/youbot/emailer/gmail_emailer.py deleted file mode 100644 index cfb00f9..0000000 --- a/youbot/emailer/gmail_emailer.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import List, Dict -import logging -from gmail import GMail, Message - -from .abstract_emailer import AbstractEmailer -from youbot import ColorizedLogger - -logger = ColorizedLogger('GmailEmailer') - - -class GmailEmailer(AbstractEmailer): - __slots__ = ('_handler', 'email_address', 'test_mode') - - _handler: GMail - test_mode: bool - - def __init__(self, config: Dict, test_mode: bool = False) -> None: - """ - The basic constructor. Creates a new instance of EmailApp using the specified credentials - - :param config: - :param test_mode: - """ - - self.email_address = config['email_address'] - self._handler = self.get_handler(email_address=self.email_address, - api_key=config['api_key']) - self.test_mode = test_mode - super().__init__() - - @staticmethod - def get_handler(email_address: str, api_key: str) -> GMail: - """ - Returns an EmailApp handler. - - :param email_address: - :param api_key: - :return: - """ - - gmail_handler = GMail(username=email_address, password=api_key) - gmail_handler.connect() - return gmail_handler - - def is_connected(self) -> bool: - return self._handler.is_connected() - - def get_self_email(self): - return self.email_address - - def send_email(self, subject: str, to: List, cc: List = None, bcc: List = None, text: str = None, - html: str = None, - attachments: List = None, sender: str = None, reply_to: str = None) -> None: - """ - Sends an email with the specified arguments. - - :param subject: - :param to: - :param cc: - :param bcc: - :param text: - :param html: - :param attachments: - :param sender: - :param reply_to: - :return: - """ - - if self.test_mode: - to = self.email_address - cc = self.email_address if cc is not None else None - bcc = self.email_address if bcc is not None else None - - msg = Message(subject=subject, - to=",".join(to), - cc=",".join(cc) if cc is not None else None, - bcc=",".join(bcc) if cc is not None else None, - text=text, - html=html, - attachments=attachments, - sender=sender, - reply_to=reply_to) - logger.debug("Sending email with Message: %s" % msg) - self._handler.send(msg) - - def __exit__(self): - self._handler.close() diff --git a/youbot/fancy_logger/__init__.py b/youbot/fancy_logger/__init__.py deleted file mode 100644 index 3e7e604..0000000 --- a/youbot/fancy_logger/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""FancyLog sub-package of YoutubeCommentBot.""" - -from .colorized_logger import ColorizedLogger - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" - diff --git a/youbot/fancy_logger/abstract_fancy_logger.py b/youbot/fancy_logger/abstract_fancy_logger.py deleted file mode 100644 index 96eea1a..0000000 --- a/youbot/fancy_logger/abstract_fancy_logger.py +++ /dev/null @@ -1,19 +0,0 @@ -from abc import ABC, abstractmethod - - -class AbstractFancyLogger(ABC): - """Abstract class of the FancyLog package""" - - @abstractmethod - def __init__(self, *args, **kwargs) -> None: - """The basic constructor. Creates a new instance of FancyLog using the - specified arguments - - Args: - *args: - **kwargs: - """ - - @abstractmethod - def create_logger(self, *args, **kwargs): - pass diff --git a/youbot/fancy_logger/colorized_logger.py b/youbot/fancy_logger/colorized_logger.py deleted file mode 100644 index b588075..0000000 --- a/youbot/fancy_logger/colorized_logger.py +++ /dev/null @@ -1,151 +0,0 @@ -import os -from typing import List, Union -import types -import logging -from termcolor import colored - -from .abstract_fancy_logger import AbstractFancyLogger - - -class ColorizedLogger(AbstractFancyLogger): - """ColorizedLogger class of the FancyLog package""" - - __slots__ = ('_logger', 'logger_name', '_color', '_on_color', '_attrs', - 'debug', 'info', 'warn', 'warning', 'error', 'exception', 'critical') - - log_fmt: str = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' - log_date_fmt: str = '%Y-%m-%d %H:%M:%S' - log_level: Union[int, str] = logging.INFO - _logger: logging.Logger - log_path: str = None - logger_name: str - _color: str - _on_color: str - _attrs: List - - def __init__(self, logger_name: str, - color: str = 'white', on_color: str = None, - attrs: List = None) -> None: - """ - Args: - logger_name (str): - color (str): - attrs (List): AnyOf('bold', 'dark', 'underline', 'blink', 'reverse', 'concealed') - """ - - self._color = color - self._on_color = on_color - self._attrs = attrs if attrs else ['bold'] - self.logger_name = logger_name - self._logger = self.create_logger(logger_name=logger_name) - super().__init__() - - def __getattr__(self, name: str): - """ - Args: - name (str): - """ - - def log_colored(log_text: str, *args, **kwargs): - color = self._color if 'color' not in kwargs else kwargs['color'] - on_color = self._on_color if 'on_color' not in kwargs else kwargs['on_color'] - attrs = self._attrs if 'attrs' not in kwargs else kwargs['attrs'] - colored_text = colored(log_text, color=color, on_color=on_color, attrs=attrs) - return getattr(self._logger, name)(colored_text, *args) - - if name in ['debug', 'info', 'warn', 'warning', - 'error', 'exception', 'critical']: - self.add_file_handler_if_needed(self._logger) - return log_colored - elif name in ['newline', 'nl']: - self.add_file_handler_if_needed(self._logger) - return getattr(self._logger, name) - else: - return AbstractFancyLogger.__getattribute__(self, name) - - @staticmethod - def log_newline(self, num_lines=1): - # Switch handler, output a blank line - if hasattr(self, 'main_file_handler') and hasattr(self, 'blank_file_handler'): - self.removeHandler(self.main_file_handler) - self.addHandler(self.blank_file_handler) - self.removeHandler(self.main_streaming_handler) - self.addHandler(self.blank_streaming_handler) - # Print the new lines - for i in range(num_lines): - self.info('') - # Switch back - if hasattr(self, 'main_file_handler') and hasattr(self, 'blank_file_handler'): - self.removeHandler(self.blank_file_handler) - self.addHandler(self.main_file_handler) - self.removeHandler(self.blank_streaming_handler) - self.addHandler(self.main_streaming_handler) - - def add_file_handler_if_needed(self, logger): - if not (hasattr(logger, 'main_file_handler') and hasattr(logger, 'blank_file_handler')) \ - and self.log_path: - # Create a file handler - self.create_logs_folder(self.log_path) - main_file_handler = logging.FileHandler(self.log_path) - main_file_handler.setLevel(self.log_level) - main_file_handler.setFormatter(logging.Formatter(fmt=self.log_fmt, - datefmt=self.log_date_fmt)) - # Create a "blank line" file handler - blank_file_handler = logging.FileHandler(self.log_path) - blank_file_handler.setLevel(self.log_level) - blank_file_handler.setFormatter(logging.Formatter(fmt='')) - # Add file handlers - logger.addHandler(main_file_handler) - logger.main_file_handler = main_file_handler - logger.blank_file_handler = blank_file_handler - return logger - - def create_logger(self, logger_name: str): - # Create a logger, with the previously-defined handlers - logger = logging.getLogger(logger_name) - logger.handlers = [] - logger.setLevel(self.log_level) - logger = self.add_file_handler_if_needed(logger) - # Create a streaming handler - main_streaming_handler = logging.StreamHandler() - main_streaming_handler.setLevel(self.log_level) - main_streaming_handler.setFormatter(logging.Formatter(fmt=self.log_fmt, - datefmt=self.log_date_fmt)) - # Create a "blank line" streaming handler - blank_streaming_handler = logging.StreamHandler() - blank_streaming_handler.setLevel(self.log_level) - blank_streaming_handler.setFormatter(logging.Formatter(fmt='')) - # Add streaming handlers - logger.addHandler(main_streaming_handler) - logger.propagate = False - logger.main_streaming_handler = main_streaming_handler - logger.blank_streaming_handler = blank_streaming_handler - # Create the new line method - logger.newline = types.MethodType(self.log_newline, logger) - logger.nl = logger.newline - return logger - - @staticmethod - def create_logs_folder(log_path: str): - log_path = os.path.abspath(log_path).split(os.sep) - log_dir = (os.sep.join(log_path[:-1])) - if not os.path.exists(log_dir): - os.makedirs(log_dir) - - @classmethod - def setup_logger(cls, log_path: str, debug: bool = False, clear_log: bool = False) -> None: - """ Sets-up the basic_logger - - Args: - log_path (str): The path where the log file will be saved - debug (bool): Whether to print debug messages or not - clear_log (bool): Whether to empty the log file or not - """ - cls.log_path = os.path.abspath(log_path) - if clear_log: - open(cls.log_path, 'w').close() - cls.log_level = logging.INFO if debug is not True else logging.DEBUG - fancy_log_logger.info(f"Logger is set. Log file path: {cls.log_path}") - - -fancy_log_logger = ColorizedLogger(logger_name='FancyLogger', color='white') diff --git a/youbot/main.py b/youbot/main.py index 49f8df3..11536d2 100644 --- a/youbot/main.py +++ b/youbot/main.py @@ -56,7 +56,7 @@ def get_args() -> argparse.Namespace: def commenter(youtube: YoutubeManager, args: argparse.Namespace) -> None: - raise NotImplementedError() + youtube.commenter() def accumulator(youtube: YoutubeManager, args: argparse.Namespace) -> None: @@ -81,7 +81,7 @@ def list_comments(youtube: YoutubeManager, args: argparse.Namespace) -> None: def refresh_photos(youtube: YoutubeManager, args: argparse.Namespace) -> None: - raise NotImplementedError() + youtube.refresh_photos() def main(): @@ -100,7 +100,7 @@ def main(): db_conf = conf_obj.get_config('datastore')[0] # Setup Youtube API youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, - tag=conf_obj.tag) + sleep_time=you_conf['sleep_time'], tag=conf_obj.tag) # Run in the specified run mode func = globals()[args.run_mode] func(youtube, args) diff --git a/youbot/youtube_utils/__init__.py b/youbot/youtube_utils/__init__.py deleted file mode 100644 index a0b1704..0000000 --- a/youbot/youtube_utils/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Youtube Utils sub-package of YoutubeCommentBot.""" - -from .youtube_manager import YoutubeApiV3, YoutubeManager - -__author__ = "drkostas" -__email__ = "georgiou.kostas94@gmail.com" -__version__ = "2.0" diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py deleted file mode 100644 index a3e5002..0000000 --- a/youbot/youtube_utils/youtube_api.py +++ /dev/null @@ -1,372 +0,0 @@ -from typing import List, Tuple, Dict, Union, Any -from abc import ABC, abstractmethod -import os -import re -import math -from datetime import datetime, timedelta, timezone -import dateutil.parser -from oauth2client.file import Storage -from oauth2client.tools import argparser, run_flow -from oauth2client.client import OAuth2WebServerFlow -import googleapiclient -from googleapiclient.discovery import build -import httplib2 - -from youbot import ColorizedLogger - -logger = ColorizedLogger('YoutubeApi') - - -class AbstractYoutubeApi(ABC): - __slots__ = ('channel_name', 'channel_id', '_api', 'tag') - - @abstractmethod - def __init__(self, config: Dict, tag: str) -> None: - """ - The basic constructor. Creates a new instance of YoutubeManager using the specified credentials - - :param config: - """ - - self.tag = tag - self._api = self._build_api(**config, tag=self.tag) - self.channel_name, self.channel_id = self._get_my_username_and_id() - - @staticmethod - @abstractmethod - def _build_api(*args, **kwargs): - pass - - @abstractmethod - def _get_my_username_and_id(self) -> str: - pass - - -class YoutubeApiV3(AbstractYoutubeApi): - - def __init__(self, config: Dict, tag: str): - super().__init__(config, tag) - - @staticmethod - def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, - tag: str) -> googleapiclient.discovery.Resource: - """ - Build a youtube api connection. - - Args: - client_id: - client_secret: - api_version: - read_only_scope: - tag: - """ - - flow = OAuth2WebServerFlow(client_id=client_id, - client_secret=client_secret, - scope=read_only_scope) - key_path = os.path.join('..', 'keys', f'{tag}.json') - storage = Storage(key_path) - credentials = storage.get() - - if credentials is None or credentials.invalid: - flags = argparser.parse_args(args=['--noauth_local_webserver']) - credentials = run_flow(flow, storage, flags) - - api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) - return api - - def _get_my_username_and_id(self) -> Tuple[str, str]: - channels_response = self._api.channels().list( - part="snippet", - fields='items(id,snippet(title))', - mine='true' - ).execute() - if channels_response: - channel_info = self._yt_to_channel_dict(channels_response) - my_username = channel_info['username'] - my_id = channel_info['channel_id'] - else: - error_msg = "Got empty response when trying to get the self username." - logger.error(error_msg) - raise Exception(error_msg) - return my_username, my_id - - def comment(self, video_id: str, comment_text: str) -> None: - - try: - properties = {'snippet.channelId': self.channel_id, - 'snippet.videoId': video_id, - 'snippet.topLevelComment.snippet.textOriginal': comment_text} - self._comment_threads_insert(properties=properties, - part='snippet') - except Exception as exc: - logger.error(f"An error occurred:\n{exc}") - - def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: - """ Queries YouTube for a channel using the specified username. - - Args: - username (str): The username to search for - """ - - channels_response = self._api.channels().list( - forUsername=username, - part="snippet", - fields='items(id,snippet(title))' - ).execute() - if channels_response: - channel = self._yt_to_channel_dict(channels_response) - if channel is not None: - channel['username'] = username - else: - logger.warning(f"Got empty response for channel username: {username}") - channel = {} - return channel - - def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: - """ Queries YouTube for a channel using the specified channel id. - - Args: - channel_id (str): The channel ID to search for - """ - - channels_response = self._api.channels().list( - id=channel_id, - part="snippet", - fields='items(id,snippet(title))' - ).execute() - - return self._yt_to_channel_dict(channels_response) - - def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: - """ Retrieves new uploads for the specified channels. - - Args: - channels(list): A list with channel IDs - last_n_hours: - """ - - # Separate the channels list in 50-sized channel lists - channels_lists = self.split_list(channels, 50) - channels_to_check = [] - # Get the Playlist IDs of each channel - for channels in channels_lists: - channels_response = self._api.channels().list( - id=",".join(channels), - part="contentDetails,snippet", - fields="items(id,contentDetails(relatedPlaylists(uploads)),snippet(title))" - ).execute() - channels_to_check.extend(channels_response["items"]) - # For each playlist ID, get 50 videos - for channel in channels_to_check: - uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] - for upload in self._get_uploads_playlist(uploads_list_id, last_n_hours): - upload['channel_title'] = channel['snippet']['title'] - upload['channel_id'] = channel['id'] - yield upload - - def get_video_comments(self, url: str, search_terms: str = None) -> List: - """ Populates a list with comments (and their replies). - - Args: - url: - search_terms: - """ - - if not search_terms: - search_terms = self.channel_name - video_id = re.search(r"^.*(youtu\.be\/|vi?\/|u\/\w\/|embed\/|\?vi?=|\&vi?=)([^#\&\?]*).*", - url).group(2) - page_token = "" # "&pageToken={}".format(page_token) - comment_threads_response = self._api.commentThreads().list( - part="snippet", - maxResults=100, - videoId="{}{}".format(video_id, page_token), - searchTerms=search_terms - ).execute() - - comments = [] - for comment_thread in comment_threads_response['items']: - channel_name = comment_thread['snippet']['topLevelComment']['snippet']['authorDisplayName'] - if channel_name == self.channel_name: - current_comment = {"url": url, "video_id": video_id, - "comment_id": comment_thread['id'], - "like_count": - comment_thread['snippet']['topLevelComment']['snippet'][ - 'likeCount'], - "reply_count": comment_thread['snippet']['totalReplyCount']} - comments.append(current_comment) - - return comments - - def get_profile_pictures(self, channels: List = None) -> List[Tuple[str, str]]: - """ Gets the profile picture urls for a list of channel ids (or for the self channel). - - Args: - channels: - - Returns: - profile_pictures: [(channel_id, thumbnail_url), ..] - """ - - if channels is None: - profile_pictures_request = self._api.channels().list( - mine="true", - part="snippet", - fields='items(id,snippet(thumbnails(default)))' - ) - else: - profile_pictures_request = self._api.channels().list( - id=",".join(channels), - part="snippet", - fields='items(id,snippet(thumbnails(default)))' - ) - - profile_pictures_response = profile_pictures_request.execute() - - profile_pictures_result = [] - for profile_picture in profile_pictures_response["items"]: - profile_pictures_result.append( - (profile_picture["id"], profile_picture["snippet"]["thumbnails"]["default"]["url"])) - - return profile_pictures_result - - @staticmethod - def _yt_to_channel_dict(response: Dict) -> Union[Dict, None]: - """ - Transforms a YouTube API response into a channel Dict. - - Args: - response: - """ - - for channel in response['items']: - result = dict() - result['channel_id'] = channel['id'] - result['username'] = channel['snippet']['title'] - result['added_on'] = datetime.utcnow().isoformat() - result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() - return result - return None - - @staticmethod - def split_list(input_list: List, chunk_size: int) -> List: - """ - Split a list into `chunk_size` sub-lists. - - Args: - input_list: - chunk_size: - """ - - chunks = math.ceil(len(input_list) / chunk_size) - if chunks == 1: - output_list = [input_list] - else: - output_list = [] - end = 0 - for i in range(chunks - 1): - start = i * chunk_size - end = (i + 1) * chunk_size - output_list.append(input_list[start:end]) - output_list.append(input_list[end:]) - - return output_list - - def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> Dict: - """ Retrieves uploads using the specified playlist ID which were have been added - since the last check. - - Args: - uploads_list_id (str): The ID of the uploads playlist - """ - - # Construct the request - playlist_items_request = self._api.playlistItems().list( - playlistId=uploads_list_id, - part="snippet", - fields='items(id,snippet(title,publishedAt,resourceId(videoId)))', - maxResults=50 - ) - - while playlist_items_request: - playlist_items_response = playlist_items_request.execute() - for playlist_item in playlist_items_response["items"]: - published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) - video = dict() - # Return the video only if it was published in the last `last_n_hours` hours - if published_at >= (datetime.utcnow() - timedelta(hours=last_n_hours)).replace( - tzinfo=timezone.utc): - video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] - video['published_at'] = playlist_item["snippet"]["publishedAt"] - video['title'] = playlist_item["snippet"]["title"] - yield video - else: - return - - playlist_items_request = self._api.playlistItems().list_next( - playlist_items_request, playlist_items_response - ) - - def _comment_threads_insert(self, properties: Dict, **kwargs: Any) -> Dict: - """ Comment using the Youtube API. - Args: - properties: - **kwargs: - """ - - resource = self._build_resource(properties) - kwargs = self._remove_empty_kwargs(**kwargs) - response = self._api.commentThreads().insert(body=resource, **kwargs).execute() - return response - - @staticmethod - def _build_resource(properties: Dict) -> Dict: - """ Build a resource based on a list of properties given as key-value pairs. - Leave properties with empty values out of the inserted resource. """ - - resource = {} - for p in properties: - # Given a key like "snippet.title", split into "snippet" and "title", where - # "snippet" will be an object and "title" will be a property in that object. - prop_array = p.split('.') - ref = resource - for pa in range(0, len(prop_array)): - is_array = False - key = prop_array[pa] - # For properties that have array values, convert a name like - # "snippet.tags[]" to snippet.tags, and set a flag to handle - # the value as an array. - if key[-2:] == '[]': - key = key[0:len(key) - 2:] - is_array = True - if pa == (len(prop_array) - 1): - # Leave properties without values out of inserted resource. - if properties[p]: - if is_array: - ref[key] = properties[p].split(',') - else: - ref[key] = properties[p] - elif key not in ref: - # For example, the property is "snippet.title", but the resource does - # not yet have a "snippet" object. Create the snippet object here. - # Setting "ref = ref[key]" means that in the next time through the - # "for pa in range ..." loop, we will be setting a property in the - # resource's "snippet" object. - ref[key] = {} - ref = ref[key] - else: - # For example, the property is "snippet.description", and the resource - # already has a "snippet" object. - ref = ref[key] - return resource - - @staticmethod - def _remove_empty_kwargs(**kwargs: Any) -> Dict: - """ Remove keyword arguments that are not set. """ - good_kwargs = {} - if kwargs is not None: - for key, value in kwargs.items(): - if value: - good_kwargs[key] = value - return good_kwargs diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py deleted file mode 100644 index 64d5422..0000000 --- a/youbot/youtube_utils/youtube_manager.py +++ /dev/null @@ -1,99 +0,0 @@ -from typing import List, Tuple, Dict, Union, Any -import arrow - -from youbot import ColorizedLogger, YoutubeMySqlDatastore -from .youtube_api import YoutubeApiV3 - -logger = ColorizedLogger('YoutubeManager') - - -class YoutubeManager(YoutubeApiV3): - __slots__ = ('db',) - - def __init__(self, config: Dict, db_conf: Dict, tag: str): - self.db = YoutubeMySqlDatastore(config=db_conf['config']) - super().__init__(config, tag) - - def add_channel(self, channel_id: str = None, username: str = None) -> None: - if channel_id: - channel_info = self.get_channel_info_by_id(channel_id) - elif username: - channel_info = self.get_channel_info_by_username(username) - else: - raise YoutubeManagerError("You should either pass channel id or username " - "to add channel!") - if channel_info: - self.db.add_channel(channel_data=channel_info) - logger.info(f"Channel `{channel_info['username']}` successfully added!") - else: - raise YoutubeManagerError("Channel not found!") - - def remove_channel(self, channel_id: str = None, username: str = None) -> None: - if channel_id: - self.db.remove_channel_by_id(channel_id) - logger.info(f"Channel `{channel_id}` successfully removed!") - elif username: - self.db.remove_channel_by_username(username) - logger.info(f"Channel `{username}` successfully removed!") - else: - raise YoutubeManagerError("You should either pass channel id or username " - "to remove channel!") - - def list_channels(self) -> None: - channels = [(row["channel_id"], row["username"].title(), - arrow.get(row["added_on"]).humanize(), - arrow.get(row["last_commented"]).humanize()) - for row in self.db.get_channels()] - - headers = ['Channel Id', 'Channel Name', 'Added On', 'Last Commented'] - self.pretty_print(headers, channels) - - def list_comments(self, n_recent: int = 50, min_likes: int = -1, - min_replies: int = -1) -> None: - - comments = [(row["username"].title(), row["comment"], - arrow.get(row["comment_time"]).humanize(), - row["like_count"], row["reply_count"], row["comment_link"]) - for row in self.db.get_comments(n_recent, min_likes, min_replies)] - - headers = ['Channel', 'Comment', 'Time', 'Likes', 'Replies', 'Comment URL'] - self.pretty_print(headers, comments) - - @staticmethod - def pretty_print(headers: List[str], data: List[Tuple]): - """Print the provided header and data in a visually pleasing manner - - Args: - headers: The headers to print - data: The data rows - """ - - print_str = "\n" - if len(data) == 0: - return - - separators = [] - for word in headers: - separators.append('-' * len(word)) - - output = [headers, separators] + data - - col_widths = [0] * len(headers) - for row in output: - for idx, column in enumerate(row): - if len(str(column)) > 100: - row[idx] = row[idx][:94] + " (...)" - if len(str(row[idx])) > col_widths[idx]: - col_widths[idx] = len(row[idx]) - - for row in output: - for idx, column in enumerate(row): - column = str(column) - print_str += "".join(column.ljust(col_widths[idx])) + " " - print_str += '\n' - logger.info(print_str) - - -class YoutubeManagerError(Exception): - def __init__(self, message): - super().__init__(message) diff --git a/youbot/yt_mysql.py b/youbot/yt_mysql.py new file mode 100644 index 0000000..6b6e6bd --- /dev/null +++ b/youbot/yt_mysql.py @@ -0,0 +1,235 @@ +from youbot import ColorLogger, HighMySQL +from typing import * +from datetime import datetime + +logger = ColorLogger('YoutubeMySqlDatastore') + + +class YoutubeMySqlDatastore(HighMySQL): + CHANNEL_TABLE = 'channels' + COMMENTS_TABLE = 'comments' + + def __init__(self, config: Dict) -> None: + """ + The basic constructor. Creates a new instance of Datastore using the specified credentials + :param config: + """ + + super().__init__(config) + self.create_tables_if_not_exist() + + def create_tables_if_not_exist(self): + channels_schema = \ + """ + channel_id varchar(100) default '' not null, + username varchar(100) not null, + added_on varchar(100) not null, + last_commented varchar(100) not null, + priority int auto_increment, + channel_photo varchar(100) default '-1' null, + constraint id_pk PRIMARY KEY (channel_id), + constraint channel_id unique (channel_id), + constraint priority unique (priority), + constraint username unique (username)""" + comments_schema = \ + """ + channel_id varchar(100) not null, + video_link varchar(100) not null, + comment varchar(255) not null, + comment_time varchar(100) not null, + like_count int default -1 null, + reply_count int default -1 null, + comment_id varchar(100) default '-1' null, + video_id varchar(100) default '-1' null, + comment_link varchar(100) default '-1' null, + constraint video_link_pk PRIMARY KEY (video_link), + constraint video_link unique (video_link), + constraint channel_id foreign key (channel_id) references channels (channel_id) on update cascade on delete cascade""" + + self.create_table(table=self.CHANNEL_TABLE, schema=channels_schema) + self.create_table(table=self.COMMENTS_TABLE, schema=comments_schema) + + def get_channels(self) -> List[Dict]: + """ Retrieve all channels from the database. """ + + result = self.select_from_table(table=self.CHANNEL_TABLE, order_by='priority') + for row in result: + yield self._table_row_to_channel_dict(row) + + def add_channel(self, channel_data: Dict) -> None: + """ Insert the provided channel into the database""" + + try: + self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data, if_not_exists=True) + except HighMySQL.mysql.connector.errors.IntegrityError as e: + logger.error(f"MySQL error: {e}") + + def get_channel_by_id(self, ch_id: str) -> Tuple: + """Retrieve a channel from the database by its ID + Args: + ch_id (str): The channel ID + """ + + where_statement = f"id='{ch_id}'" + result = self.select_from_table(table=self.CHANNEL_TABLE, where=where_statement) + if len(result) > 1: + logger.warning("Duplicate channel retrieved from SELECT statement:{result}") + elif len(result) == 0: + result.append(()) + + return result[0] + + def get_channel_by_username(self, ch_username: str) -> Tuple: + """Retrieve a channel from the database by its Username + Args: + ch_username (str): The channel ID + """ + + where_statement = f"username='{ch_username}'" + result = self.select_from_table(table=self.CHANNEL_TABLE, where=where_statement) + if len(result) > 1: + logger.warning("Duplicate channel retrieved from SELECT statement:{result}") + elif len(result) == 0: + result.append(()) + + return result[0] + + def remove_channel_by_id(self, ch_id: str) -> None: + """Retrieve a channel from the database by its ID + Args: + ch_id (str): The channel ID + """ + + where_statement = f"id='{ch_id}'" + self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) + + def remove_channel_by_username(self, ch_username: str) -> None: + """Delete a channel from the database by its Username + Args: + ch_username (str): The channel ID + """ + + where_statement = f"username='{ch_username}'" + self.delete_from_table(table=self.CHANNEL_TABLE, where=where_statement) + + def update_channel_photo(self, channel_id: str, photo_url: str) -> None: + """ + Update the profile picture link of a channel. + Args: + channel_id: + photo_url: + """ + + set_data = {'channel_photo': photo_url} + self.update_table(table=self.CHANNEL_TABLE, + set_data=set_data, + where=f"channel_id='{channel_id}'") + + def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: + """ TODO: check the case where a comment contains single quotes + Add comment data and update the `last_commented` channel column. + Args: + ch_id: + video_link: + comment_text: + """ + + datetime_now = datetime.utcnow().isoformat() + comments_data = {'channel_id': ch_id, + 'video_link': video_link, + 'comment': comment_text, + 'comment_time': datetime_now} + update_data = {'last_commented': datetime_now} + where_statement = f"channel_id='{ch_id}'" + + try: + self.insert_into_table(self.COMMENTS_TABLE, data=comments_data) + # Update Channel's last_commented timestamp + self.update_table(table=self.CHANNEL_TABLE, set_data=update_data, where=where_statement) + except HighMySQL.mysql.connector.errors.IntegrityError as e: + logger.error(f"MySQL Error: {e}") + + def get_comments(self, n_recent: int = 50, min_likes: int = -1, + min_replies: int = -1) -> List[Dict]: + """ + Get the latest n_recent comments from the comments table. + Args: + n_recent: + min_likes: + min_replies: + """ + + comment_cols = 'video_link, comment, comment_time, like_count, reply_count, comment_link' + channel_cols = 'username, channel_photo' + where = f'l.like_count>={min_likes} AND l.reply_count>={min_replies} ' + for comment in self.select_join(left_table=self.COMMENTS_TABLE, + right_table=self.CHANNEL_TABLE, + left_columns=comment_cols, + right_columns=channel_cols, + join_key_left='channel_id', + join_key_right='channel_id', + where=where, + order_by='l.comment_time', + asc_or_desc='desc', + limit=n_recent): + yield self._table_row_to_comment_dict(comment) + + def update_comment(self, video_link: str, comment_id: str, + like_cnt: int, reply_cnt: int) -> None: + """ + Populate a comment entry with additional information. + Args: + video_link: + comment_id: + like_cnt: + reply_cnt: + """ + + # Get video id + video_id = video_link.split('v=')[1].split('&')[0] + # Create Comment Link + comment_link = f'https://youtube.com/watch?v={video_id}&lc={comment_id}' + # Construct the update key-values + set_data = {'comment_link': comment_link, + 'video_id': video_id, + 'comment_id': comment_id, + 'like_count': like_cnt, + 'reply_count': reply_cnt} + # Execute the update command + self.update_table(table=self.COMMENTS_TABLE, + set_data=set_data, + where=f"video_link='{video_link}'") + + @staticmethod + def _table_row_to_channel_dict(row: Tuple) -> Dict: + """Transform a table row into a channel representation + Args: + row (list): The database row + """ + + channel = dict() + channel['channel_id'] = row[0] + channel['username'] = row[1] + channel['added_on'] = row[2] + channel['last_commented'] = row[3] + channel['priority'] = row[4] + channel['channel_photo'] = row[5] + return channel + + @staticmethod + def _table_row_to_comment_dict(row: Tuple) -> Dict: + """Transform a table row into a channel representation + Args: + row (list): The database row + """ + + channel = dict() + channel['video_link'] = row[0] + channel['comment'] = row[1] + channel['comment_time'] = row[2] + channel['like_count'] = row[3] + channel['reply_count'] = row[4] + channel['comment_link'] = row[5] + channel['username'] = row[6] + channel['channel_photo'] = row[7] + return channel From 006b3b304081dff06d5e994a6d6411bd7cd768c0 Mon Sep 17 00:00:00 2001 From: drkostas Date: Thu, 26 May 2022 19:03:12 -0400 Subject: [PATCH 15/33] Gitignore addition --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 682cef8..0d19e03 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,7 @@ dmypy.json *.tar *.bz2 *.zip -*.7z \ No newline at end of file +*.7z + +# keys +/keys/*.json From 8b48c474e45e0b23d039c2cec11180856395e5a1 Mon Sep 17 00:00:00 2001 From: drkostas Date: Fri, 27 May 2022 17:11:38 -0400 Subject: [PATCH 16/33] Added utils sub-package, Bug fixes, Updated lib versions #4 --- Makefile | 4 +- TODO.md | 2 +- requirements.txt | 18 +- youbot/youtube_utils/__init__.py | 7 + youbot/youtube_utils/youtube_api.py | 372 ++++++++++++++++++++++++ youbot/youtube_utils/youtube_manager.py | 160 ++++++++++ 6 files changed, 551 insertions(+), 12 deletions(-) create mode 100644 youbot/youtube_utils/__init__.py create mode 100644 youbot/youtube_utils/youtube_api.py create mode 100644 youbot/youtube_utils/youtube_manager.py diff --git a/Makefile b/Makefile index 75d6871..0dba176 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ install: @echo "To activate the conda environment run:" @echo ' conda activate youbot' clean: - $(PYTHON_BIN)python setup.py clean + $(BIN)/python setup.py clean delete_env: @echo "Deleting virtual environment.." eval $(DELETE_COMMAND) @@ -79,7 +79,7 @@ create_env: @echo "Creating virtual environment.." eval $(CREATE_COMMAND) requirements: - pip install -r requirements.txt + $(BIN)/pip install -r requirements.txt setup: $(BIN)/pip install setuptools $(BIN)/python setup.py install $(SETUP_FLAG) diff --git a/TODO.md b/TODO.md index c3b16a9..a42854d 100644 --- a/TODO.md +++ b/TODO.md @@ -5,7 +5,7 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Build YouTube Manager class - [X] Create child MySQL class - [X] Integrate YoutubeMysql class into the YoutubeManager class -- [ ] Use the pypi packages I have created instead of the local ones +- [X] Use the pypi packages I have created instead of the local ones - [ ] Create the workflow for the commenter - [ ] Roll the comments for each channel - store comments in sql table? - [ ] Create table with errors diff --git a/requirements.txt b/requirements.txt index 762619b..f29b005 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ -arrow~=1.1.1 -bs4 -google-api-python-client~=2.7.0 -google-auth-oauthlib~=0.4.4 -httplib2~=0.19.1 +arrow>=1.2.2 +bs4>=0.0.1 +google-api-python-client>=2.49.0 +google-auth-oauthlib>=0.5.1 +httplib2>=0.20.4 oauth2client~=4.1.3 -python-dateutil~=2.8.1 -requests~=2.25.1 -setuptools>=52.0.0 -tqdm +python-dateutil>=2.8.2 +requests>=2.27.1 +setuptools>=62.3.2 +tqdm>=4.64.0 yaml-config-wrapper==1.0.4 termcolor-logger==1.0.3 high_sql==1.0.2 diff --git a/youbot/youtube_utils/__init__.py b/youbot/youtube_utils/__init__.py new file mode 100644 index 0000000..a0b1704 --- /dev/null +++ b/youbot/youtube_utils/__init__.py @@ -0,0 +1,7 @@ +"""Youtube Utils sub-package of YoutubeCommentBot.""" + +from .youtube_manager import YoutubeApiV3, YoutubeManager + +__author__ = "drkostas" +__email__ = "georgiou.kostas94@gmail.com" +__version__ = "2.0" diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py new file mode 100644 index 0000000..2b114fc --- /dev/null +++ b/youbot/youtube_utils/youtube_api.py @@ -0,0 +1,372 @@ +from typing import List, Tuple, Dict, Union, Any +from abc import ABC, abstractmethod +import os +import re +import math +from datetime import datetime, timedelta, timezone +import dateutil.parser +from oauth2client.file import Storage +from oauth2client.tools import argparser, run_flow +from oauth2client.client import OAuth2WebServerFlow +import googleapiclient +from googleapiclient.discovery import build +import httplib2 + +from youbot import ColorLogger + +logger = ColorLogger('YoutubeApi') + + +class AbstractYoutubeApi(ABC): + __slots__ = ('channel_name', 'channel_id', '_api', 'tag') + + @abstractmethod + def __init__(self, config: Dict, tag: str) -> None: + """ + The basic constructor. Creates a new instance of YoutubeManager using the specified credentials + + :param config: + """ + + self.tag = tag + self._api = self._build_api(**config, tag=self.tag) + self.channel_name, self.channel_id = self._get_my_username_and_id() + + @staticmethod + @abstractmethod + def _build_api(*args, **kwargs): + pass + + @abstractmethod + def _get_my_username_and_id(self) -> str: + pass + + +class YoutubeApiV3(AbstractYoutubeApi): + + def __init__(self, config: Dict, tag: str): + super().__init__(config, tag) + + @staticmethod + def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, + tag: str) -> googleapiclient.discovery.Resource: + """ + Build a youtube api connection. + + Args: + client_id: + client_secret: + api_version: + read_only_scope: + tag: + """ + + flow = OAuth2WebServerFlow(client_id=client_id, + client_secret=client_secret, + scope=read_only_scope) + key_path = os.path.join('../../youbot (3)', 'keys', f'{tag}.json') + storage = Storage(key_path) + credentials = storage.get() + + if credentials is None or credentials.invalid: + flags = argparser.parse_args(args=['--noauth_local_webserver']) + credentials = run_flow(flow, storage, flags) + + api = build('youtube', api_version, http=credentials.authorize(httplib2.Http())) + return api + + def _get_my_username_and_id(self) -> Tuple[str, str]: + channels_response = self._api.channels().list( + part="snippet", + fields='items(id,snippet(title))', + mine='true' + ).execute() + if channels_response: + channel_info = self._yt_to_channel_dict(channels_response) + my_username = channel_info['username'] + my_id = channel_info['channel_id'] + else: + error_msg = "Got empty response when trying to get the self username." + logger.error(error_msg) + raise Exception(error_msg) + return my_username, my_id + + def comment(self, video_id: str, comment_text: str) -> None: + + try: + properties = {'snippet.channelId': self.channel_id, + 'snippet.videoId': video_id, + 'snippet.topLevelComment.snippet.textOriginal': comment_text} + # self._comment_threads_insert(properties=properties, + # part='snippet') + except Exception as exc: + logger.error(f"An error occurred:\n{exc}") + + def get_channel_info_by_username(self, username: str) -> Union[Dict, None]: + """ Queries YouTube for a channel using the specified username. + + Args: + username (str): The username to search for + """ + + channels_response = self._api.channels().list( + forUsername=username, + part="snippet", + fields='items(id,snippet(title))' + ).execute() + if channels_response: + channel = self._yt_to_channel_dict(channels_response) + if channel is not None: + channel['username'] = username + else: + logger.warning(f"Got empty response for channel username: {username}") + channel = {} + return channel + + def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: + """ Queries YouTube for a channel using the specified channel id. + + Args: + channel_id (str): The channel ID to search for + """ + + channels_response = self._api.channels().list( + id=channel_id, + part="snippet", + fields='items(id,snippet(title))' + ).execute() + + return self._yt_to_channel_dict(channels_response) + + def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: + """ Retrieves new uploads for the specified channels. + + Args: + channels(list): A list with channel IDs + last_n_hours: + """ + + # Separate the channels list in 50-sized channel lists + channels_lists = self.split_list(channels, 50) + channels_to_check = [] + # Get the Playlist IDs of each channel + for channels in channels_lists: + channels_response = self._api.channels().list( + id=",".join(channels), + part="contentDetails,snippet", + fields="items(id,contentDetails(relatedPlaylists(uploads)),snippet(title))" + ).execute() + channels_to_check.extend(channels_response["items"]) + # For each playlist ID, get 50 videos + for channel in channels_to_check: + uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] + for upload in self._get_uploads_playlist(uploads_list_id, last_n_hours): + upload['channel_title'] = channel['snippet']['title'] + upload['channel_id'] = channel['id'] + yield upload + + def get_video_comments(self, url: str, search_terms: str = None) -> List: + """ Populates a list with comments (and their replies). + + Args: + url: + search_terms: + """ + + if not search_terms: + search_terms = self.channel_name + video_id = re.search(r"^.*(youtu\.be\/|vi?\/|u\/\w\/|embed\/|\?vi?=|\&vi?=)([^#\&\?]*).*", + url).group(2) + page_token = "" # "&pageToken={}".format(page_token) + comment_threads_response = self._api.commentThreads().list( + part="snippet", + maxResults=100, + videoId="{}{}".format(video_id, page_token), + searchTerms=search_terms + ).execute() + + comments = [] + for comment_thread in comment_threads_response['items']: + channel_name = comment_thread['snippet']['topLevelComment']['snippet']['authorDisplayName'] + if channel_name == self.channel_name: + current_comment = {"url": url, "video_id": video_id, + "comment_id": comment_thread['id'], + "like_count": + comment_thread['snippet']['topLevelComment']['snippet'][ + 'likeCount'], + "reply_count": comment_thread['snippet']['totalReplyCount']} + comments.append(current_comment) + + return comments + + def get_profile_pictures(self, channels: List = None) -> List[Tuple[str, str]]: + """ Gets the profile picture urls for a list of channel ids (or for the self channel). + + Args: + channels: + + Returns: + profile_pictures: [(channel_id, thumbnail_url), ..] + """ + + if channels is None: + profile_pictures_request = self._api.channels().list( + mine="true", + part="snippet", + fields='items(id,snippet(thumbnails(default)))' + ) + else: + profile_pictures_request = self._api.channels().list( + id=",".join(channels), + part="snippet", + fields='items(id,snippet(thumbnails(default)))' + ) + + profile_pictures_response = profile_pictures_request.execute() + + profile_pictures_result = [] + for profile_picture in profile_pictures_response["items"]: + profile_pictures_result.append( + (profile_picture["id"], profile_picture["snippet"]["thumbnails"]["default"]["url"])) + + return profile_pictures_result + + @staticmethod + def _yt_to_channel_dict(response: Dict) -> Union[Dict, None]: + """ + Transforms a YouTube API response into a channel Dict. + + Args: + response: + """ + + for channel in response['items']: + result = dict() + result['channel_id'] = channel['id'] + result['username'] = channel['snippet']['title'] + result['added_on'] = datetime.utcnow().isoformat() + result['last_commented'] = (datetime.utcnow() - timedelta(days=1)).isoformat() + return result + return None + + @staticmethod + def split_list(input_list: List, chunk_size: int) -> List: + """ + Split a list into `chunk_size` sub-lists. + + Args: + input_list: + chunk_size: + """ + + chunks = math.ceil(len(input_list) / chunk_size) + if chunks == 1: + output_list = [input_list] + else: + output_list = [] + end = 0 + for i in range(chunks - 1): + start = i * chunk_size + end = (i + 1) * chunk_size + output_list.append(input_list[start:end]) + output_list.append(input_list[end:]) + + return output_list + + def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> Dict: + """ Retrieves uploads using the specified playlist ID which were have been added + since the last check. + + Args: + uploads_list_id (str): The ID of the uploads playlist + """ + + # Construct the request + playlist_items_request = self._api.playlistItems().list( + playlistId=uploads_list_id, + part="snippet", + fields='items(id,snippet(title,publishedAt,resourceId(videoId)))', + maxResults=50 + ) + + while playlist_items_request: + playlist_items_response = playlist_items_request.execute() + for playlist_item in playlist_items_response["items"]: + published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) + video = dict() + # Return the video only if it was published in the last `last_n_hours` hours + if published_at >= (datetime.utcnow() - timedelta(hours=last_n_hours)).replace( + tzinfo=timezone.utc): + video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] + video['published_at'] = playlist_item["snippet"]["publishedAt"] + video['title'] = playlist_item["snippet"]["title"] + yield video + else: + return + + playlist_items_request = self._api.playlistItems().list_next( + playlist_items_request, playlist_items_response + ) + + def _comment_threads_insert(self, properties: Dict, **kwargs: Any) -> Dict: + """ Comment using the Youtube API. + Args: + properties: + **kwargs: + """ + + resource = self._build_resource(properties) + kwargs = self._remove_empty_kwargs(**kwargs) + response = self._api.commentThreads().insert(body=resource, **kwargs).execute() + return response + + @staticmethod + def _build_resource(properties: Dict) -> Dict: + """ Build a resource based on a list of properties given as key-value pairs. + Leave properties with empty values out of the inserted resource. """ + + resource = {} + for p in properties: + # Given a key like "snippet.title", split into "snippet" and "title", where + # "snippet" will be an object and "title" will be a property in that object. + prop_array = p.split('.') + ref = resource + for pa in range(0, len(prop_array)): + is_array = False + key = prop_array[pa] + # For properties that have array values, convert a name like + # "snippet.tags[]" to snippet.tags, and set a flag to handle + # the value as an array. + if key[-2:] == '[]': + key = key[0:len(key) - 2:] + is_array = True + if pa == (len(prop_array) - 1): + # Leave properties without values out of inserted resource. + if properties[p]: + if is_array: + ref[key] = properties[p].split(',') + else: + ref[key] = properties[p] + elif key not in ref: + # For example, the property is "snippet.title", but the resource does + # not yet have a "snippet" object. Create the snippet object here. + # Setting "ref = ref[key]" means that in the next time through the + # "for pa in range ..." loop, we will be setting a property in the + # resource's "snippet" object. + ref[key] = {} + ref = ref[key] + else: + # For example, the property is "snippet.description", and the resource + # already has a "snippet" object. + ref = ref[key] + return resource + + @staticmethod + def _remove_empty_kwargs(**kwargs: Any) -> Dict: + """ Remove keyword arguments that are not set. """ + good_kwargs = {} + if kwargs is not None: + for key, value in kwargs.items(): + if value: + good_kwargs[key] = value + return good_kwargs diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py new file mode 100644 index 0000000..3f5adf4 --- /dev/null +++ b/youbot/youtube_utils/youtube_manager.py @@ -0,0 +1,160 @@ +from typing import * +from datetime import datetime, timedelta +import time +import arrow + +from youbot import ColorLogger, YoutubeMySqlDatastore +from .youtube_api import YoutubeApiV3 + +logger = ColorLogger('YoutubeManager') + + +class YoutubeManager(YoutubeApiV3): + __slots__ = ('db', 'sleep_time') + + def __init__(self, config: Dict, db_conf: Dict, sleep_time: int, tag: str): + self.db = YoutubeMySqlDatastore(config=db_conf['config']) + self.sleep_time = sleep_time + super().__init__(config, tag) + + def commenter(self): + # Set sleep_time = 0 for the first loop + sleep_time = 0 + # Start the main loop + while True: + time.sleep(sleep_time) + channel_ids = [channel['channel_id'] for channel in + self.db.get_channels(order_by='priority')] + comments = self.db.get_comments(n_recent=50) + video_links_commented = [comment['video_link'] for comment in comments] + latest_videos = self.get_uploads(channels=channel_ids, last_n_hours=250) + comments_added = [] + # Sort the videos by the priority of the channels (channel_ids are sorted by priority) + # and comment in the videos not already commented + try: + for video in sorted(latest_videos, + key=lambda video: channel_ids.index(video["channel_id"])): + video_url = f'https://youtube.com/watch?v={video["id"]}' + if video_url not in video_links_commented: + comment_text = self.get_next_comment(channel_id=video["channel_id"]) + self.comment(video_id=video["id"], comment_text=comment_text) + # Add the info of the new comment to be added in the DB + comments_added.append((video, video_url, comment_text, + datetime.utcnow().isoformat())) + except Exception as e: + logger.error(f"Exception in the main loop of the Commenter:\n{e}") + sleep_time = self.seconds_until_next_hour() + logger.error(f"Will sleep until next hour ({sleep_time} seconds)") + else: + sleep_time = self.sleep_time + # Save the new comments added in the DB + try: + for (video, video_url, comment_text, comment_time) in comments_added: + self.db.add_comment(video["channel_id"], video_link=video_url, + comment_text=comment_text, upload_time=video["published_at"], + comment_time=comment_time) + except Exception as e: + logger.error(f"MySQL error while storing comment:\n{e}") + raise e + # REMOVE ME + break + + def add_channel(self, channel_id: str = None, username: str = None) -> None: + if channel_id: + channel_info = self.get_channel_info_by_id(channel_id) + elif username: + channel_info = self.get_channel_info_by_username(username) + else: + raise YoutubeManagerError("You should either pass channel id or username " + "to add channel!") + if channel_info: + self.db.add_channel(channel_data=channel_info) + logger.info(f"Channel `{channel_info['username']}` successfully added!") + else: + raise YoutubeManagerError("Channel not found!") + + def remove_channel(self, channel_id: str = None, username: str = None) -> None: + if channel_id: + self.db.remove_channel_by_id(channel_id) + logger.info(f"Channel `{channel_id}` successfully removed!") + elif username: + self.db.remove_channel_by_username(username) + logger.info(f"Channel `{username}` successfully removed!") + else: + raise YoutubeManagerError("You should either pass channel id or username " + "to remove channel!") + + def refresh_photos(self): + channel_ids = [channel["channel_id"] for channel in self.db.get_channels()] + profile_pictures = self.get_profile_pictures(channel_ids) + for channel_id, picture_url in profile_pictures: + self.db.update_channel_photo(channel_id, picture_url) + + def list_channels(self) -> None: + channels = [(row["channel_id"], row["username"].title(), + arrow.get(row["added_on"]).humanize(), + arrow.get(row["last_commented"]).humanize()) + for row in self.db.get_channels()] + + headers = ['Channel Id', 'Channel Name', 'Added On', 'Last Commented'] + self.pretty_print(headers, channels) + + def list_comments(self, n_recent: int = 50, min_likes: int = -1, + min_replies: int = -1) -> None: + + comments = [(row["username"].title(), row["comment"], + arrow.get(row["comment_time"]).humanize(), + row["like_count"], row["reply_count"], row["comment_link"]) + for row in self.db.get_comments(n_recent, min_likes, min_replies)] + + headers = ['Channel', 'Comment', 'Time', 'Likes', 'Replies', 'Comment URL'] + self.pretty_print(headers, comments) + + def get_next_comment(self, channel_id: str) -> str: + return f"Test comment for {channel_id}" + + @staticmethod + def pretty_print(headers: List[str], data: List[Tuple]): + """Print the provided header and data in a visually pleasing manner + + Args: + headers: The headers to print + data: The data rows + """ + + print_str = "\n" + if len(data) == 0: + return + + separators = [] + for word in headers: + separators.append('-' * len(word)) + + output = [headers, separators] + data + + col_widths = [0] * len(headers) + for row in output: + for idx, column in enumerate(row): + if len(str(column)) > 100: + row[idx] = row[idx][:94] + " (...)" + if len(str(row[idx])) > col_widths[idx]: + col_widths[idx] = len(row[idx]) + + for row in output: + for idx, column in enumerate(row): + column = str(column) + print_str += "".join(column.ljust(col_widths[idx])) + " " + print_str += '\n' + logger.info(print_str) + + @staticmethod + def seconds_until_next_hour() -> int: + delta = timedelta(hours=1) + now = datetime.now() + next_hour = (now + delta).replace(microsecond=0, second=0, minute=2) + return (next_hour - now).seconds + + +class YoutubeManagerError(Exception): + def __init__(self, message): + super().__init__(message) From 35cd2db0fd1b428720dadec77979fe497f39d657 Mon Sep 17 00:00:00 2001 From: drkostas Date: Fri, 27 May 2022 18:47:18 -0400 Subject: [PATCH 17/33] Fixed bugs with list commands, renamed main to run #4 --- TODO.md | 6 ++- youbot/{main.py => run.py} | 18 +++++---- youbot/youtube_utils/youtube_api.py | 3 +- youbot/yt_mysql.py | 62 ++++++++++++++++++++++++++++- 4 files changed, 78 insertions(+), 11 deletions(-) rename youbot/{main.py => run.py} (88%) diff --git a/TODO.md b/TODO.md index a42854d..5610b95 100644 --- a/TODO.md +++ b/TODO.md @@ -7,11 +7,13 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Integrate YoutubeMysql class into the YoutubeManager class - [X] Use the pypi packages I have created instead of the local ones - [ ] Create the workflow for the commenter +- [ ] Find a better way to change priorities (probably add a function to push everything) - [ ] Roll the comments for each channel - store comments in sql table? - [ ] Create table with errors - [ ] Create the workflow for the accumulator -- [ ] Add SQL script for creating the tables needed +- [ ] Add SQL scripts for creating the tables needed - [ ] Send me email on fatal error - [ ] Recreate the Livestreaming module - [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) -- [ ] Use multiple account (different api keys) to check for new comments +- [ ] Use multiple accounts (different api keys) to check for new comments +- [ ] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) diff --git a/youbot/main.py b/youbot/run.py similarity index 88% rename from youbot/main.py rename to youbot/run.py index 11536d2..63557ba 100644 --- a/youbot/main.py +++ b/youbot/run.py @@ -1,13 +1,13 @@ import traceback import argparse -from youbot import Configuration, ColorizedLogger, YoutubeManager +from youbot import Configuration, ColorLogger, YoutubeManager -logger = ColorizedLogger(logger_name='Main', color='yellow') +logger = ColorLogger(logger_name='Main', color='yellow') def get_args() -> argparse.Namespace: - """ Setup the argument parser. + """ Set up the argument parser. Returns: argparse.Namespace: @@ -63,6 +63,10 @@ def accumulator(youtube: YoutubeManager, args: argparse.Namespace) -> None: raise NotImplementedError() +def set_priority(youtube: YoutubeManager, args: argparse.Namespace) -> None: + raise NotImplementedError() + + def add_channel(youtube: YoutubeManager, args: argparse.Namespace) -> None: youtube.add_channel(channel_id=args.id, username=args.username) @@ -85,20 +89,20 @@ def refresh_photos(youtube: YoutubeManager, args: argparse.Namespace) -> None: def main(): - """ This is the main function of main.py + """ This is the main function of run.py Example: - python youbot/main.py -m run_mode_1 -c confs/conf.yml -l logs/output.log + python youbot/run.py -m run_mode_1 -c confs/conf.yml -l logs/output.log """ # Initializing args = get_args() - ColorizedLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=True) + ColorLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=True) # Load the configurations conf_obj = Configuration(config_src=args.config_file) you_conf = conf_obj.get_config('youtube')[0] db_conf = conf_obj.get_config('datastore')[0] - # Setup Youtube API + # Setup YouTube API youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, sleep_time=you_conf['sleep_time'], tag=conf_obj.tag) # Run in the specified run mode diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py index 2b114fc..9dbf38d 100644 --- a/youbot/youtube_utils/youtube_api.py +++ b/youbot/youtube_utils/youtube_api.py @@ -64,7 +64,8 @@ def _build_api(client_id: str, client_secret: str, api_version: str, read_only_s flow = OAuth2WebServerFlow(client_id=client_id, client_secret=client_secret, scope=read_only_scope) - key_path = os.path.join('../../youbot (3)', 'keys', f'{tag}.json') + base_path = os.path.dirname(os.path.abspath(__file__)) + key_path = os.path.join(base_path, '../../', 'keys', f'{tag}.json') storage = Storage(key_path) credentials = storage.get() diff --git a/youbot/yt_mysql.py b/youbot/yt_mysql.py index 6b6e6bd..5132d5b 100644 --- a/youbot/yt_mysql.py +++ b/youbot/yt_mysql.py @@ -54,7 +54,7 @@ def get_channels(self) -> List[Dict]: result = self.select_from_table(table=self.CHANNEL_TABLE, order_by='priority') for row in result: - yield self._table_row_to_channel_dict(row) + yield self._table_row_to_channel_dict(row, ) def add_channel(self, channel_data: Dict) -> None: """ Insert the provided channel into the database""" @@ -158,6 +158,7 @@ def get_comments(self, n_recent: int = 50, min_likes: int = -1, min_likes: min_replies: """ + self.select_from_table(self.COMMENTS_TABLE) comment_cols = 'video_link, comment, comment_time, like_count, reply_count, comment_link' channel_cols = 'username, channel_photo' @@ -200,6 +201,65 @@ def update_comment(self, video_link: str, comment_id: str, set_data=set_data, where=f"video_link='{video_link}'") + def select_join(self, left_table: str, right_table: str, + join_key_left: str, join_key_right: str, + left_columns: str = '', right_columns: str = '', custom_columns: str = '', + join_type: str = 'INNER', + where: str = 'TRUE', order_by: str = 'NULL', asc_or_desc: str = 'ASC', + limit: int = 1000, group_by: str = '', having: str = '') -> List[Tuple]: + """ + Join two tables and select. + + Args: + left_table: + right_table: + left_columns: + right_columns: + custom_columns: Custom columns for which no `l.` or `r.` will be added automatically + join_key_left: The column of join of the left table + join_key_right: The column of join of the right table + join_type: OneOf(INNER, LEFT, RIGHT) + where: Add a `l.` or `.r` before the specified columns + order_by: Add a `l.` or `.r` before the specified columns + asc_or_desc: + limit: + group_by: Add a `l.` or `.r` before the specified columns + having: Add a `l.` or `.r` before the specified columns + """ + + # Construct Group By + if group_by: + if having: + having = f'HAVING {having}' + group_by = f'GROUP BY {group_by} {having} ' + + # Construct Columns + if left_columns: + left_columns = 'l.' + ', l.'.join(map(str.strip, left_columns.split(','))) + if right_columns or custom_columns: + left_columns += ', ' + if right_columns: + right_columns = 'r.' + ', r.'.join(map(str.strip, right_columns.split(','))) + if custom_columns: + right_columns += ', ' + columns = f'{left_columns} {right_columns} {custom_columns}' + + # Build the Query + query = f"SELECT {columns} " \ + f"FROM {left_table} l " \ + f"{join_type} JOIN {right_table} r " \ + f"ON l.{join_key_left}=r.{join_key_right} " \ + f"WHERE {where} " \ + f"{group_by}" \ + f"ORDER BY {order_by} {asc_or_desc} " \ + f"LIMIT {limit}" + + logger.debug("Executing: %s" % query) + self._cursor.execute(query) + results = self._cursor.fetchall() + + return results + @staticmethod def _table_row_to_channel_dict(row: Tuple) -> Dict: """Transform a table row into a channel representation From 00455c7a098606d5efb35449d98bd14a8f1c4586 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 14:25:27 -0400 Subject: [PATCH 18/33] Added set_priority functionality #4 --- TODO.md | 2 +- youbot/run.py | 15 +++++-- youbot/youtube_utils/youtube_manager.py | 34 +++++++++++---- youbot/yt_mysql.py | 57 ++++++++++++++++++++++--- 4 files changed, 89 insertions(+), 19 deletions(-) diff --git a/TODO.md b/TODO.md index 5610b95..0b47413 100644 --- a/TODO.md +++ b/TODO.md @@ -6,8 +6,8 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Create child MySQL class - [X] Integrate YoutubeMysql class into the YoutubeManager class - [X] Use the pypi packages I have created instead of the local ones +- [X] Find a better way to change priorities (probably add a function to push everything) - [ ] Create the workflow for the commenter -- [ ] Find a better way to change priorities (probably add a function to push everything) - [ ] Roll the comments for each channel - store comments in sql table? - [ ] Create table with errors - [ ] Create the workflow for the accumulator diff --git a/youbot/run.py b/youbot/run.py index 63557ba..11e6287 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -29,7 +29,7 @@ def get_args() -> argparse.Namespace: optional_args = parser.add_argument_group('Optional Arguments') commands = ['commenter', 'accumulator', 'add_channel', 'remove_channel', 'list_channels', 'list_comments', - 'refresh_photos'] + 'refresh_photos', 'set_priority'] optional_args.add_argument('-m', '--run-mode', choices=commands, default=commands[0], help='Description of the run modes') @@ -42,6 +42,8 @@ def get_args() -> argparse.Namespace: help="Number of minimum liked for `list_comments`") optional_args.add_argument('--min_replies', default=-1, help="Number of minimum replies for `list_comments`") + optional_args.add_argument('--priority', + help="Priority number for specified channel for `set_priority`") optional_args.add_argument('-d', '--debug', action='store_true', help='Enables the debug log messages') optional_args.add_argument("-h", "--help", action="help", help="Show this help message and exit") @@ -49,9 +51,13 @@ def get_args() -> argparse.Namespace: args = parser.parse_args() # Custom Condition Checking if (args.id is None and args.username is None) and \ - args.run_mode in ['add_channel', 'remove_channel']: + args.run_mode in ['add_channel', 'remove_channel', 'set_priority']: parser.error('You need to pass either --id or --username when selecting ' - 'the `add_channel` and `remove_channel` actions') + 'the `add_channel`, `remove_channel`, or `set_priority` actions') + if (args.priority is None) and \ + args.run_mode in ['set_priority']: + parser.error('You need to pass --priority when selecting ' + 'the `set_priority` action') return args @@ -64,7 +70,8 @@ def accumulator(youtube: YoutubeManager, args: argparse.Namespace) -> None: def set_priority(youtube: YoutubeManager, args: argparse.Namespace) -> None: - raise NotImplementedError() + youtube.set_priority(channel_id=args.id, username=args.username, + priority=args.priority) def add_channel(youtube: YoutubeManager, args: argparse.Namespace) -> None: diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 3f5adf4..b5683a8 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -24,20 +24,21 @@ def commenter(self): while True: time.sleep(sleep_time) channel_ids = [channel['channel_id'] for channel in - self.db.get_channels(order_by='priority')] + self.db.get_channels()] comments = self.db.get_comments(n_recent=50) video_links_commented = [comment['video_link'] for comment in comments] - latest_videos = self.get_uploads(channels=channel_ids, last_n_hours=250) + latest_videos = self.get_uploads(channels=channel_ids, + last_n_hours=250) # TODO: make this configurable comments_added = [] # Sort the videos by the priority of the channels (channel_ids are sorted by priority) # and comment in the videos not already commented try: for video in sorted(latest_videos, - key=lambda video: channel_ids.index(video["channel_id"])): + key=lambda _video: channel_ids.index(_video["channel_id"])): video_url = f'https://youtube.com/watch?v={video["id"]}' if video_url not in video_links_commented: comment_text = self.get_next_comment(channel_id=video["channel_id"]) - self.comment(video_id=video["id"], comment_text=comment_text) + # self.comment(video_id=video["id"], comment_text=comment_text) # Add the info of the new comment to be added in the DB comments_added.append((video, video_url, comment_text, datetime.utcnow().isoformat())) @@ -51,8 +52,7 @@ def commenter(self): try: for (video, video_url, comment_text, comment_time) in comments_added: self.db.add_comment(video["channel_id"], video_link=video_url, - comment_text=comment_text, upload_time=video["published_at"], - comment_time=comment_time) + comment_text=comment_text, upload_time=video["published_at"]) except Exception as e: logger.error(f"MySQL error while storing comment:\n{e}") raise e @@ -90,13 +90,29 @@ def refresh_photos(self): for channel_id, picture_url in profile_pictures: self.db.update_channel_photo(channel_id, picture_url) + def set_priority(self, channel_id: str = None, username: str = None, priority: str = None) -> None: + if channel_id: + channel_info = self.get_channel_info_by_id(channel_id) + elif username: + channel_info = self.get_channel_info_by_username(username) + else: + raise YoutubeManagerError("You should either pass channel id or username " + "to add channel!") + if channel_info: + self.db.set_priority(channel_data=channel_info, priority=priority) + logger.info(f"Channel `{channel_info['username']}` priority changed to {priority}!") + else: + raise YoutubeManagerError("Channel not found!") + def list_channels(self) -> None: - channels = [(row["channel_id"], row["username"].title(), + channels = [(row["priority"], row["username"].title(), row["channel_id"], arrow.get(row["added_on"]).humanize(), - arrow.get(row["last_commented"]).humanize()) + arrow.get(row["last_commented"]).humanize(), + row["channel_photo"] + ) for row in self.db.get_channels()] - headers = ['Channel Id', 'Channel Name', 'Added On', 'Last Commented'] + headers = ['Priority', 'Channel Name', 'Channel ID', 'Added On', 'Last Commented', 'Channel Photo'] self.pretty_print(headers, channels) def list_comments(self, n_recent: int = 50, min_likes: int = -1, diff --git a/youbot/yt_mysql.py b/youbot/yt_mysql.py index 5132d5b..518df17 100644 --- a/youbot/yt_mysql.py +++ b/youbot/yt_mysql.py @@ -60,8 +60,50 @@ def add_channel(self, channel_data: Dict) -> None: """ Insert the provided channel into the database""" try: - self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data, if_not_exists=True) - except HighMySQL.mysql.connector.errors.IntegrityError as e: + # TODO: Implement if_not_exists=True in HighMySQL + self.insert_into_table(table=self.CHANNEL_TABLE, data=channel_data) + except Exception as e: + # TODO: except HighMySQL.mysql.connector.errors.IntegrityError as e: + # Expose mysql in HighMySQL + logger.error(f"MySQL error: {e}") + + def set_priority(self, channel_data: Dict, priority: str) -> None: + """ Insert the provided channel into the database""" + priority = int(priority) + req_priority = priority + req_channel_id = channel_data['channel_id'] + channels = list(self.get_channels()) + try: + # Give all channels a temp priority + for channel in channels: + channel_id = channel['channel_id'] + # Execute the update command + self.update_table(table=self.CHANNEL_TABLE, + set_data={'priority': -int(channel['priority'])}, + where=f"channel_id='{channel_id}'") + # Update the other channels + ch_cnt = 1 + for channel in channels: + channel_id = channel['channel_id'] + if channel_id == req_channel_id: + continue + if channel['priority'] < req_priority: + set_data = {'priority': ch_cnt} + ch_cnt += 1 + else: + set_data = {'priority': priority + 1} + priority += 1 + # Execute the update command + self.update_table(table=self.CHANNEL_TABLE, + set_data=set_data, + where=f"channel_id='{channel_id}'") + # Update the requested channel + self.update_table(table=self.CHANNEL_TABLE, + set_data={'priority': req_priority}, + where=f"channel_id='{req_channel_id}'") + except Exception as e: + # TODO: except HighMySQL.mysql.connector.errors.IntegrityError as e: + # Expose mysql in HighMySQL logger.error(f"MySQL error: {e}") def get_channel_by_id(self, ch_id: str) -> Tuple: @@ -125,20 +167,22 @@ def update_channel_photo(self, channel_id: str, photo_url: str) -> None: set_data=set_data, where=f"channel_id='{channel_id}'") - def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: + def add_comment(self, ch_id: str, video_link: str, comment_text: str, upload_time: str) -> None: """ TODO: check the case where a comment contains single quotes Add comment data and update the `last_commented` channel column. Args: ch_id: video_link: comment_text: + upload_time: """ datetime_now = datetime.utcnow().isoformat() comments_data = {'channel_id': ch_id, 'video_link': video_link, 'comment': comment_text, - 'comment_time': datetime_now} + 'comment_time': datetime_now, + 'upload_time': upload_time} update_data = {'last_commented': datetime_now} where_statement = f"channel_id='{ch_id}'" @@ -146,7 +190,9 @@ def add_comment(self, ch_id: str, video_link: str, comment_text: str) -> None: self.insert_into_table(self.COMMENTS_TABLE, data=comments_data) # Update Channel's last_commented timestamp self.update_table(table=self.CHANNEL_TABLE, set_data=update_data, where=where_statement) - except HighMySQL.mysql.connector.errors.IntegrityError as e: + except Exception as e: + # TODO: except HighMySQL.mysql.connector.errors.IntegrityError as e: + # Expose mysql in HighMySQL logger.error(f"MySQL Error: {e}") def get_comments(self, n_recent: int = 50, min_likes: int = -1, @@ -201,6 +247,7 @@ def update_comment(self, video_link: str, comment_id: str, set_data=set_data, where=f"video_link='{video_link}'") + # TODO: Add this to HighMySQL def select_join(self, left_table: str, right_table: str, join_key_left: str, join_key_right: str, left_columns: str = '', right_columns: str = '', custom_columns: str = '', From 91e09307ebab4bbe76269072de475d844562bc74 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 15:07:42 -0400 Subject: [PATCH 19/33] Configurable max poster hours, started simulated youtube video posts #4 --- comments/sample_comments.txt | 7 +++---- confs/commenter.yml | 3 ++- youbot/run.py | 4 +++- youbot/youtube_utils/youtube_api.py | 15 ++++++++------- youbot/youtube_utils/youtube_manager.py | 8 +++++--- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/comments/sample_comments.txt b/comments/sample_comments.txt index 9b60dcd..cc48c98 100644 --- a/comments/sample_comments.txt +++ b/comments/sample_comments.txt @@ -1,4 +1,3 @@ -Hey! I am a bot. -Hello there! -Nice video! -Woo nice! \ No newline at end of file +Cool video +Nice editing +Ignore this comment \ No newline at end of file diff --git a/confs/commenter.yml b/confs/commenter.yml index 256bcb1..2e37361 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -22,8 +22,9 @@ youtube: client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - type: normal sleep_time: 60 + max_posted_hours: 250 # max num. of hours to check back for videos + type: simulated # normal, simulated comment: - config: comments_list: diff --git a/youbot/run.py b/youbot/run.py index 11e6287..b870a21 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -111,7 +111,9 @@ def main(): db_conf = conf_obj.get_config('datastore')[0] # Setup YouTube API youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, - sleep_time=you_conf['sleep_time'], tag=conf_obj.tag) + sleep_time=you_conf['sleep_time'], + max_posted_hours=you_conf['max_posted_hours'], + tag=conf_obj.tag) # Run in the specified run mode func = globals()[args.run_mode] func(youtube, args) diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py index 9dbf38d..bf564ad 100644 --- a/youbot/youtube_utils/youtube_api.py +++ b/youbot/youtube_utils/youtube_api.py @@ -100,6 +100,7 @@ def comment(self, video_id: str, comment_text: str) -> None: 'snippet.topLevelComment.snippet.textOriginal': comment_text} # self._comment_threads_insert(properties=properties, # part='snippet') + # TODO: uncomment this when commenter is done except Exception as exc: logger.error(f"An error occurred:\n{exc}") @@ -139,12 +140,12 @@ def get_channel_info_by_id(self, channel_id: str) -> Union[Dict, None]: return self._yt_to_channel_dict(channels_response) - def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: + def get_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: """ Retrieves new uploads for the specified channels. Args: channels(list): A list with channel IDs - last_n_hours: + max_posted_hours: """ # Separate the channels list in 50-sized channel lists @@ -161,7 +162,7 @@ def get_uploads(self, channels: List, last_n_hours: int = 2) -> Dict: # For each playlist ID, get 50 videos for channel in channels_to_check: uploads_list_id = channel["contentDetails"]["relatedPlaylists"]["uploads"] - for upload in self._get_uploads_playlist(uploads_list_id, last_n_hours): + for upload in self._get_uploads_playlist(uploads_list_id, max_posted_hours): upload['channel_title'] = channel['snippet']['title'] upload['channel_id'] = channel['id'] yield upload @@ -274,8 +275,8 @@ def split_list(input_list: List, chunk_size: int) -> List: return output_list - def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> Dict: - """ Retrieves uploads using the specified playlist ID which were have been added + def _get_uploads_playlist(self, uploads_list_id: str, max_posted_hours: int = 2) -> Dict: + """ Retrieves uploads using the specified playlist ID which were had been added since the last check. Args: @@ -296,7 +297,7 @@ def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> published_at = dateutil.parser.parse(playlist_item['snippet']['publishedAt']) video = dict() # Return the video only if it was published in the last `last_n_hours` hours - if published_at >= (datetime.utcnow() - timedelta(hours=last_n_hours)).replace( + if published_at >= (datetime.utcnow() - timedelta(hours=max_posted_hours)).replace( tzinfo=timezone.utc): video['id'] = playlist_item["snippet"]["resourceId"]["videoId"] video['published_at'] = playlist_item["snippet"]["publishedAt"] @@ -310,7 +311,7 @@ def _get_uploads_playlist(self, uploads_list_id: str, last_n_hours: int = 2) -> ) def _comment_threads_insert(self, properties: Dict, **kwargs: Any) -> Dict: - """ Comment using the Youtube API. + """ Comment using the YouTube API. Args: properties: **kwargs: diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index b5683a8..a23ee63 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -12,9 +12,10 @@ class YoutubeManager(YoutubeApiV3): __slots__ = ('db', 'sleep_time') - def __init__(self, config: Dict, db_conf: Dict, sleep_time: int, tag: str): + def __init__(self, config: Dict, db_conf: Dict, sleep_time: int, max_posted_hours: int, tag: str): self.db = YoutubeMySqlDatastore(config=db_conf['config']) self.sleep_time = sleep_time + self.max_posted_hours = max_posted_hours super().__init__(config, tag) def commenter(self): @@ -28,7 +29,7 @@ def commenter(self): comments = self.db.get_comments(n_recent=50) video_links_commented = [comment['video_link'] for comment in comments] latest_videos = self.get_uploads(channels=channel_ids, - last_n_hours=250) # TODO: make this configurable + max_posted_hours=self.max_posted_hours) comments_added = [] # Sort the videos by the priority of the channels (channel_ids are sorted by priority) # and comment in the videos not already commented @@ -112,7 +113,8 @@ def list_channels(self) -> None: ) for row in self.db.get_channels()] - headers = ['Priority', 'Channel Name', 'Channel ID', 'Added On', 'Last Commented', 'Channel Photo'] + headers = ['Priority', 'Channel Name', 'Channel ID', 'Added On', 'Last Commented', + 'Channel Photo'] self.pretty_print(headers, channels) def list_comments(self, n_recent: int = 50, min_likes: int = -1, From 6f519051b46d09a994d53fa3ba452a599d3174d3 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 16:02:36 -0400 Subject: [PATCH 20/33] Created simulated video posts #4 --- youbot/run.py | 2 +- youbot/youtube_utils/youtube_manager.py | 39 +++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/youbot/run.py b/youbot/run.py index b870a21..be7b50d 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -113,7 +113,7 @@ def main(): youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, sleep_time=you_conf['sleep_time'], max_posted_hours=you_conf['max_posted_hours'], - tag=conf_obj.tag) + api_type=you_conf['type'], tag=conf_obj.tag) # Run in the specified run mode func = globals()[args.run_mode] func(youtube, args) diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index a23ee63..0e31db6 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -2,6 +2,8 @@ from datetime import datetime, timedelta import time import arrow +import random +import string from youbot import ColorLogger, YoutubeMySqlDatastore from .youtube_api import YoutubeApiV3 @@ -12,10 +14,14 @@ class YoutubeManager(YoutubeApiV3): __slots__ = ('db', 'sleep_time') - def __init__(self, config: Dict, db_conf: Dict, sleep_time: int, max_posted_hours: int, tag: str): + def __init__(self, config: Dict, db_conf: Dict, sleep_time: int, max_posted_hours: int, + api_type: str, tag: str): self.db = YoutubeMySqlDatastore(config=db_conf['config']) self.sleep_time = sleep_time self.max_posted_hours = max_posted_hours + self.api_type = api_type + if self.api_type == 'simulated': + self.get_uploads = self.simulate_uploads super().__init__(config, tag) def commenter(self): @@ -57,7 +63,7 @@ def commenter(self): except Exception as e: logger.error(f"MySQL error while storing comment:\n{e}") raise e - # REMOVE ME + # TODO: REMOVE ME when commenter is done break def add_channel(self, channel_id: str = None, username: str = None) -> None: @@ -131,6 +137,35 @@ def list_comments(self, n_recent: int = 50, min_likes: int = -1, def get_next_comment(self, channel_id: str) -> str: return f"Test comment for {channel_id}" + def simulate_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: + """ Generates new uploads for the specified channels. + + Args: + channels(list): A list with channel IDs + max_posted_hours: + """ + num_videos = random.randint(1, 4) + channels = [(channel['username'], channel['channel_id']) for channel in + self.db.get_channels()] + for video_ind in range(num_videos): + vid_id = ''.join( + random.choices(string.ascii_uppercase + string.ascii_lowercase + string.digits, k=11)) + title_length = random.randint(10, 40) + vid_title = ''.join(random.choices(string.ascii_lowercase + ' ', k=title_length)).title() + ch_name, ch_id = random.choice(channels) + secs = random.randint(1, 59) + mins = random.randint(1, 59) + hours = random.randint(1, 59) + published_at = (datetime.utcnow() - timedelta(seconds=secs, + minutes=mins, + hours=hours)).isoformat() + upload = {'id': vid_id, + 'published_at': published_at, + 'title': vid_title, + 'channel_title': ch_name, + 'channel_id': ch_id} + yield upload + @staticmethod def pretty_print(headers: List[str], data: List[Tuple]): """Print the provided header and data in a visually pleasing manner From 3569fea1b9edae442d76efbd407b2ecb877530f7 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 18:52:28 -0400 Subject: [PATCH 21/33] Commenter workflow done. Rolling comments implemented. #4 --- TODO.md | 5 +- comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt | 2 + comments/{sample_comments.txt => default.txt} | 0 confs/commenter.yml | 9 +-- confs/template_conf.yml | 18 ----- confs/template_conf_with_env_variables.yml | 18 ----- youbot/run.py | 3 +- youbot/youtube_utils/youtube_manager.py | 79 ++++++++++++++++--- youbot/yt_mysql.py | 6 +- 9 files changed, 81 insertions(+), 59 deletions(-) create mode 100644 comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt rename comments/{sample_comments.txt => default.txt} (100%) delete mode 100644 confs/template_conf.yml delete mode 100644 confs/template_conf_with_env_variables.yml diff --git a/TODO.md b/TODO.md index 0b47413..d73923f 100644 --- a/TODO.md +++ b/TODO.md @@ -7,8 +7,9 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Integrate YoutubeMysql class into the YoutubeManager class - [X] Use the pypi packages I have created instead of the local ones - [X] Find a better way to change priorities (probably add a function to push everything) -- [ ] Create the workflow for the commenter -- [ ] Roll the comments for each channel - store comments in sql table? +- [X] Create the workflow for the commenter +- [X] Roll the comments for each channel - store comments in sql table? +- [ ] Store comments in dropbox - [ ] Create table with errors - [ ] Create the workflow for the accumulator - [ ] Add SQL scripts for creating the tables needed diff --git a/comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt b/comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt new file mode 100644 index 0000000..60cf281 --- /dev/null +++ b/comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt @@ -0,0 +1,2 @@ +An extra comment just for this channel +Another extra comment \ No newline at end of file diff --git a/comments/sample_comments.txt b/comments/default.txt similarity index 100% rename from comments/sample_comments.txt rename to comments/default.txt diff --git a/confs/commenter.yml b/confs/commenter.yml index 2e37361..35cb6a0 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -23,10 +23,9 @@ youtube: api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl sleep_time: 60 - max_posted_hours: 250 # max num. of hours to check back for videos + max_posted_hours: 250 # max num. of hours to check back for posted videos type: simulated # normal, simulated -comment: +comments: - config: - comments_list: - path: ../comments/sample_comments.txt - type: local # local, dropbox, or mysql \ No newline at end of file + folder_name: comments + type: local # local, dropbox, or mysql \ No newline at end of file diff --git a/confs/template_conf.yml b/confs/template_conf.yml deleted file mode 100644 index fbb0910..0000000 --- a/confs/template_conf.yml +++ /dev/null @@ -1,18 +0,0 @@ -tag: template -cloudstore: - - config: - api_key: yourapikey - type: dropbox -datastore: - - config: - hostname: hostname - username: username - password: pass - db_name: mydb - port: 3306 - type: mysql -emailer: - - config: - email_address: foo@gmail.com - api_key: 123 - type: gmail \ No newline at end of file diff --git a/confs/template_conf_with_env_variables.yml b/confs/template_conf_with_env_variables.yml deleted file mode 100644 index df2c158..0000000 --- a/confs/template_conf_with_env_variables.yml +++ /dev/null @@ -1,18 +0,0 @@ -tag: template -cloudstore: - - config: - api_key: !ENV ${DROPBOX_API_KEY} - type: dropbox -datastore: - - config: - hostname: !ENV ${MYSQL_HOST} - username: !ENV ${MYSQL_USERNAME} - password: !ENV ${MYSQL_PASSWORD} - db_name: !ENV ${MYSQL_DB_NAME} - port: 3306 - type: mysql -emailer: - - config: - email_address: !ENV ${EMAIL_ADDRESS} - api_key: !ENV ${GMAIL_API_KEY} - type: gmail \ No newline at end of file diff --git a/youbot/run.py b/youbot/run.py index be7b50d..0581014 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -109,8 +109,9 @@ def main(): conf_obj = Configuration(config_src=args.config_file) you_conf = conf_obj.get_config('youtube')[0] db_conf = conf_obj.get_config('datastore')[0] + comments_conf = conf_obj.get_config('comments')[0] # Setup YouTube API - youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, + youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, comments_conf=comments_conf, sleep_time=you_conf['sleep_time'], max_posted_hours=you_conf['max_posted_hours'], api_type=you_conf['type'], tag=conf_obj.tag) diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 0e31db6..4ae5d14 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -1,9 +1,12 @@ from typing import * from datetime import datetime, timedelta +from dateutil import parser import time import arrow import random import string +import os +from glob import glob from youbot import ColorLogger, YoutubeMySqlDatastore from .youtube_api import YoutubeApiV3 @@ -12,28 +15,34 @@ class YoutubeManager(YoutubeApiV3): - __slots__ = ('db', 'sleep_time') + __slots__ = ('db', 'comments_conf', 'default_sleep_time', 'max_posted_hours', 'api_type', + 'template_comments') - def __init__(self, config: Dict, db_conf: Dict, sleep_time: int, max_posted_hours: int, + def __init__(self, config: Dict, db_conf: Dict, comments_conf: Dict, + sleep_time: int, max_posted_hours: int, api_type: str, tag: str): self.db = YoutubeMySqlDatastore(config=db_conf['config']) - self.sleep_time = sleep_time + self.comments_conf = comments_conf['config'] + self.default_sleep_time = sleep_time self.max_posted_hours = max_posted_hours self.api_type = api_type + self.template_comments = {} if self.api_type == 'simulated': self.get_uploads = self.simulate_uploads super().__init__(config, tag) def commenter(self): - # Set sleep_time = 0 for the first loop + # Initialize sleep_time = 0 # Start the main loop while True: time.sleep(sleep_time) + self.load_template_comments() channel_ids = [channel['channel_id'] for channel in self.db.get_channels()] - comments = self.db.get_comments(n_recent=50) - video_links_commented = [comment['video_link'] for comment in comments] + commented_comments, video_links_commented = self.get_comments(channel_ids=channel_ids, + n_recent=500) + latest_videos = self.get_uploads(channels=channel_ids, max_posted_hours=self.max_posted_hours) comments_added = [] @@ -44,9 +53,11 @@ def commenter(self): key=lambda _video: channel_ids.index(_video["channel_id"])): video_url = f'https://youtube.com/watch?v={video["id"]}' if video_url not in video_links_commented: - comment_text = self.get_next_comment(channel_id=video["channel_id"]) + comment_text = \ + self.get_next_template_comment(channel_id=video["channel_id"], + commented_comments=commented_comments) # self.comment(video_id=video["id"], comment_text=comment_text) - # Add the info of the new comment to be added in the DB + # Add the info of the new comment to be added in the DB after this loop comments_added.append((video, video_url, comment_text, datetime.utcnow().isoformat())) except Exception as e: @@ -54,7 +65,7 @@ def commenter(self): sleep_time = self.seconds_until_next_hour() logger.error(f"Will sleep until next hour ({sleep_time} seconds)") else: - sleep_time = self.sleep_time + sleep_time = self.default_sleep_time # Save the new comments added in the DB try: for (video, video_url, comment_text, comment_time) in comments_added: @@ -63,8 +74,16 @@ def commenter(self): except Exception as e: logger.error(f"MySQL error while storing comment:\n{e}") raise e - # TODO: REMOVE ME when commenter is done - break + + def get_comments(self, n_recent, channel_ids): + commented_comments = {} + video_links_commented = [] + for channel_id in channel_ids: + commented_comments[channel_id] = list(self.db.get_comments(channel_id=channel_id, + n_recent=n_recent)) + video_links_commented += [comment['video_link'] for comment in + commented_comments[channel_id]] + return commented_comments, video_links_commented def add_channel(self, channel_id: str = None, username: str = None) -> None: if channel_id: @@ -134,8 +153,41 @@ def list_comments(self, n_recent: int = 50, min_likes: int = -1, headers = ['Channel', 'Comment', 'Time', 'Likes', 'Replies', 'Comment URL'] self.pretty_print(headers, comments) - def get_next_comment(self, channel_id: str) -> str: - return f"Test comment for {channel_id}" + def load_template_comments(self): + if self.comments_conf['type'] == 'local': + base_path = os.path.dirname(os.path.abspath(__file__)) + comments_path = os.path.join(base_path, '../..', self.comments_conf['folder_name'], + "*.txt") + for file in glob(comments_path): + file_name = file.split('/')[-1][:-4] + with open(file) as f: + self.template_comments[file_name] = [_f.rstrip() for _f in f.readlines()] + + def get_next_template_comment(self, channel_id: str, commented_comments: Dict) -> str: + """ TODO: Probably much more efficient with numpy or sql. """ + commented_comments = commented_comments[channel_id] + available_comments = self.template_comments['default'].copy() + # Build the comments pool + if channel_id in self.template_comments: + available_comments += self.template_comments[channel_id] + # Extract unique comments commented + unique_com_coms = set(data['comment'] for data in commented_comments) + new_comments = set(available_comments) - unique_com_coms + if new_comments: # If we have new template comments + comment = next(iter(new_comments)) + else: # Otherwise, pick the oldest one (with duplicate handling + comment_dates = {} + for unique_comment in unique_com_coms: + comment_dates[unique_comment] = parser.parse('1994-04-30T08:00:00.000000') + for com_data in commented_comments: + if com_data['comment'] == unique_comment: + comment_time = parser.parse(com_data['comment_time']) + if comment_time > comment_dates[unique_comment]: + comment_dates[unique_comment] = parser.parse(com_data['comment_time']) + comment = [k for k, v in sorted(comment_dates.items(), + key=lambda p: p[1], reverse=False)][0] + + return comment def simulate_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: """ Generates new uploads for the specified channels. @@ -153,6 +205,7 @@ def simulate_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: title_length = random.randint(10, 40) vid_title = ''.join(random.choices(string.ascii_lowercase + ' ', k=title_length)).title() ch_name, ch_id = random.choice(channels) + channels.remove((ch_name, ch_id)) secs = random.randint(1, 59) mins = random.randint(1, 59) hours = random.randint(1, 59) diff --git a/youbot/yt_mysql.py b/youbot/yt_mysql.py index 518df17..1454cdd 100644 --- a/youbot/yt_mysql.py +++ b/youbot/yt_mysql.py @@ -196,7 +196,7 @@ def add_comment(self, ch_id: str, video_link: str, comment_text: str, upload_tim logger.error(f"MySQL Error: {e}") def get_comments(self, n_recent: int = 50, min_likes: int = -1, - min_replies: int = -1) -> List[Dict]: + min_replies: int = -1, channel_id: str = None) -> List[Dict]: """ Get the latest n_recent comments from the comments table. Args: @@ -207,8 +207,10 @@ def get_comments(self, n_recent: int = 50, min_likes: int = -1, self.select_from_table(self.COMMENTS_TABLE) comment_cols = 'video_link, comment, comment_time, like_count, reply_count, comment_link' - channel_cols = 'username, channel_photo' + channel_cols = 'username, channel_id, channel_photo' where = f'l.like_count>={min_likes} AND l.reply_count>={min_replies} ' + if channel_id: + where += f"AND l.channel_id='{channel_id}'" for comment in self.select_join(left_table=self.COMMENTS_TABLE, right_table=self.CHANNEL_TABLE, left_columns=comment_cols, From 59069e1c9bd42aadbe71cfd09ac5ae37b87fad90 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 20:12:27 -0400 Subject: [PATCH 22/33] Implemented saving comments in dropbox #4 --- Procfile | 2 +- TODO.md | 2 +- comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt | 2 -- comments/default.txt | 3 --- confs/commenter.yml | 5 +++-- youbot/run.py | 8 +++++--- youbot/youtube_utils/youtube_manager.py | 21 ++++++++++++++++----- 7 files changed, 26 insertions(+), 17 deletions(-) delete mode 100644 comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt delete mode 100644 comments/default.txt diff --git a/Procfile b/Procfile index 467ee6e..13a2f3b 100644 --- a/Procfile +++ b/Procfile @@ -1,2 +1,2 @@ run_tests: make run_tests -main: python youbot/main.py -m run_mode_1 -c ../confs/template_conf.yml -l logs/output.log \ No newline at end of file +main: python youbot/run.py -m commenter -c confs/commenter.yml -l logs/commenter.log \ No newline at end of file diff --git a/TODO.md b/TODO.md index d73923f..5c0a6c7 100644 --- a/TODO.md +++ b/TODO.md @@ -10,7 +10,7 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Create the workflow for the commenter - [X] Roll the comments for each channel - store comments in sql table? - [ ] Store comments in dropbox -- [ ] Create table with errors +- [ ] Store errors in sql or dropbox - [ ] Create the workflow for the accumulator - [ ] Add SQL scripts for creating the tables needed - [ ] Send me email on fatal error diff --git a/comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt b/comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt deleted file mode 100644 index 60cf281..0000000 --- a/comments/UC-ImLFXGIe2FC4Wo5hOodnw.txt +++ /dev/null @@ -1,2 +0,0 @@ -An extra comment just for this channel -Another extra comment \ No newline at end of file diff --git a/comments/default.txt b/comments/default.txt deleted file mode 100644 index cc48c98..0000000 --- a/comments/default.txt +++ /dev/null @@ -1,3 +0,0 @@ -Cool video -Nice editing -Ignore this comment \ No newline at end of file diff --git a/confs/commenter.yml b/confs/commenter.yml index 35cb6a0..d1fc7f2 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -27,5 +27,6 @@ youtube: type: simulated # normal, simulated comments: - config: - folder_name: comments - type: local # local, dropbox, or mysql \ No newline at end of file + local_folder_name: comments + dropbox_folder_name: /yt-commenter/comments + type: dropbox # local, dropbox, or mysql \ No newline at end of file diff --git a/youbot/run.py b/youbot/run.py index 0581014..5330665 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -99,19 +99,21 @@ def main(): """ This is the main function of run.py Example: - python youbot/run.py -m run_mode_1 -c confs/conf.yml -l logs/output.log + python youbot/run.py -m commenter -c confs/commenter.yml -l logs/commenter.log """ # Initializing args = get_args() - ColorLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=True) + ColorLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=False) # Load the configurations conf_obj = Configuration(config_src=args.config_file) you_conf = conf_obj.get_config('youtube')[0] db_conf = conf_obj.get_config('datastore')[0] + cloud_conf = conf_obj.get_config('cloudstore')[0] comments_conf = conf_obj.get_config('comments')[0] # Setup YouTube API - youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, comments_conf=comments_conf, + youtube = YoutubeManager(config=you_conf['config'], + db_conf=db_conf, cloud_conf=cloud_conf, comments_conf=comments_conf, sleep_time=you_conf['sleep_time'], max_posted_hours=you_conf['max_posted_hours'], api_type=you_conf['type'], tag=conf_obj.tag) diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 4ae5d14..4a4a24a 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -8,20 +8,21 @@ import os from glob import glob -from youbot import ColorLogger, YoutubeMySqlDatastore +from youbot import ColorLogger, YoutubeMySqlDatastore, DropboxCloudManager from .youtube_api import YoutubeApiV3 logger = ColorLogger('YoutubeManager') class YoutubeManager(YoutubeApiV3): - __slots__ = ('db', 'comments_conf', 'default_sleep_time', 'max_posted_hours', 'api_type', + __slots__ = ('db', 'dbox', 'comments_conf', 'default_sleep_time', 'max_posted_hours', 'api_type', 'template_comments') - def __init__(self, config: Dict, db_conf: Dict, comments_conf: Dict, + def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: Dict, sleep_time: int, max_posted_hours: int, api_type: str, tag: str): self.db = YoutubeMySqlDatastore(config=db_conf['config']) + self.dbox = DropboxCloudManager(config=cloud_conf['config']) self.comments_conf = comments_conf['config'] self.default_sleep_time = sleep_time self.max_posted_hours = max_posted_hours @@ -154,9 +155,19 @@ def list_comments(self, n_recent: int = 50, min_likes: int = -1, self.pretty_print(headers, comments) def load_template_comments(self): - if self.comments_conf['type'] == 'local': + # Download files from dropbox + if self.comments_conf['type'] == 'dropbox': + # TODO: implement this in the dropbox lib + if not os.path.exists(self.comments_conf["local_folder_name"]): + os.makedirs(self.comments_conf["local_folder_name"]) + for file in self.dbox.ls(self.comments_conf['dropbox_folder_name']).keys(): + if file[-4:] == '.txt': + self.dbox.download_file(f'{self.comments_conf["dropbox_folder_name"]}/{file}', + f'{self.comments_conf["local_folder_name"]}/{file}') + # Load comments from files + if self.comments_conf['type'] in ('local', 'dropbox'): base_path = os.path.dirname(os.path.abspath(__file__)) - comments_path = os.path.join(base_path, '../..', self.comments_conf['folder_name'], + comments_path = os.path.join(base_path, '../..', self.comments_conf['local_folder_name'], "*.txt") for file in glob(comments_path): file_name = file.split('/')[-1][:-4] From 340e8cce5001fb9ff701cc4e58d8a00d72451c07 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 20:36:30 -0400 Subject: [PATCH 23/33] Backing up logs to dropbox #4 --- TODO.md | 7 +++---- confs/commenter.yml | 4 +++- youbot/run.py | 2 +- youbot/youtube_utils/youtube_manager.py | 23 +++++++++++++++++++++-- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/TODO.md b/TODO.md index 5c0a6c7..048abc3 100644 --- a/TODO.md +++ b/TODO.md @@ -9,12 +9,11 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Find a better way to change priorities (probably add a function to push everything) - [X] Create the workflow for the commenter - [X] Roll the comments for each channel - store comments in sql table? -- [ ] Store comments in dropbox -- [ ] Store errors in sql or dropbox +- [X] Store comments in dropbox +- [X] \[Merged\] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) + Store errors in sql or dropbox +- [ ] Send me email on fatal error - [ ] Create the workflow for the accumulator - [ ] Add SQL scripts for creating the tables needed -- [ ] Send me email on fatal error - [ ] Recreate the Livestreaming module - [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) - [ ] Use multiple accounts (different api keys) to check for new comments -- [ ] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) diff --git a/confs/commenter.yml b/confs/commenter.yml index d1fc7f2..fc173b5 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -2,6 +2,8 @@ tag: commenter cloudstore: - config: api_key: !ENV ${DROPBOX_API_KEY} + logs_folder_path: /yt-commenter/logs + upload_logs_every: 120 # number of loops in commenter() type: dropbox datastore: - config: @@ -22,7 +24,7 @@ youtube: client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - sleep_time: 60 + sleep_time: 1 max_posted_hours: 250 # max num. of hours to check back for posted videos type: simulated # normal, simulated comments: diff --git a/youbot/run.py b/youbot/run.py index 5330665..63a5207 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -116,7 +116,7 @@ def main(): db_conf=db_conf, cloud_conf=cloud_conf, comments_conf=comments_conf, sleep_time=you_conf['sleep_time'], max_posted_hours=you_conf['max_posted_hours'], - api_type=you_conf['type'], tag=conf_obj.tag) + api_type=you_conf['type'], tag=conf_obj.tag, log_path=args.log) # Run in the specified run mode func = globals()[args.run_mode] func(youtube, args) diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 4a4a24a..522c4fb 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -16,11 +16,11 @@ class YoutubeManager(YoutubeApiV3): __slots__ = ('db', 'dbox', 'comments_conf', 'default_sleep_time', 'max_posted_hours', 'api_type', - 'template_comments') + 'template_comments', 'log_path', 'upload_logs_every') def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: Dict, sleep_time: int, max_posted_hours: int, - api_type: str, tag: str): + api_type: str, tag: str, log_path: str): self.db = YoutubeMySqlDatastore(config=db_conf['config']) self.dbox = DropboxCloudManager(config=cloud_conf['config']) self.comments_conf = comments_conf['config'] @@ -30,14 +30,24 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: self.template_comments = {} if self.api_type == 'simulated': self.get_uploads = self.simulate_uploads + self.log_path = log_path + self.dbox_logs_folder_path = cloud_conf['logs_folder_path'] + self.upload_logs_every = cloud_conf['upload_logs_every'] super().__init__(config, tag) def commenter(self): # Initialize sleep_time = 0 + loop_cnt = 0 # Start the main loop while True: time.sleep(sleep_time) + # Log upload handling + loop_cnt += 1 + if loop_cnt > self.upload_logs_every: + self.upload_logs() + loop_cnt = 0 + # Load necessary data self.load_template_comments() channel_ids = [channel['channel_id'] for channel in self.db.get_channels()] @@ -200,6 +210,15 @@ def get_next_template_comment(self, channel_id: str, commented_comments: Dict) - return comment + def upload_logs(self): + log_name = self.log_path.split(os.sep)[-1][:-4] + day = datetime.today().day + log_name += f'_day{day}.txt' + upload_path = os.path.join(self.dbox_logs_folder_path, log_name) + with open(self.log_path, 'rb') as f: + file_to_upload = f.read() + self.dbox.upload_file(file_bytes=file_to_upload, upload_path=upload_path) + def simulate_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: """ Generates new uploads for the specified channels. From 24e3d55c149ece7499ac0c88f2d409f44d3781f1 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sat, 28 May 2022 21:34:29 -0400 Subject: [PATCH 24/33] Ensured code works without dropbox and emailer module #4 --- TODO.md | 7 ++++--- confs/commenter.yml | 24 +++++++++++------------ youbot/run.py | 7 ++++++- youbot/youtube_utils/youtube_manager.py | 26 ++++++++++++++++--------- 4 files changed, 39 insertions(+), 25 deletions(-) diff --git a/TODO.md b/TODO.md index 048abc3..5ac33f4 100644 --- a/TODO.md +++ b/TODO.md @@ -11,9 +11,10 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Roll the comments for each channel - store comments in sql table? - [X] Store comments in dropbox - [X] \[Merged\] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) + Store errors in sql or dropbox -- [ ] Send me email on fatal error -- [ ] Create the workflow for the accumulator +- [X] Ensure code works without dropbox and emailer modules - [ ] Add SQL scripts for creating the tables needed +- [ ] Create the workflow for the accumulator - [ ] Recreate the Livestreaming module -- [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) - [ ] Use multiple accounts (different api keys) to check for new comments +- [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) +- [ ] Send me email on fatal error (on later version) diff --git a/confs/commenter.yml b/confs/commenter.yml index fc173b5..1aab5af 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -1,10 +1,4 @@ tag: commenter -cloudstore: - - config: - api_key: !ENV ${DROPBOX_API_KEY} - logs_folder_path: /yt-commenter/logs - upload_logs_every: 120 # number of loops in commenter() - type: dropbox datastore: - config: hostname: !ENV ${MYSQL_HOST} @@ -13,11 +7,6 @@ datastore: db_name: !ENV ${MYSQL_DB_NAME} port: 3306 type: mysql -emailer: - - config: - email_address: !ENV ${EMAIL_ADDRESS} - api_key: !ENV ${GMAIL_API_KEY} - type: gmail youtube: - config: client_id: !ENV ${CLIENT_ID} @@ -31,4 +20,15 @@ comments: - config: local_folder_name: comments dropbox_folder_name: /yt-commenter/comments - type: dropbox # local, dropbox, or mysql \ No newline at end of file + type: local # local, dropbox (should set `cloudstore` config), or mysql (not implemented) +#cloudstore: # Optional +# - config: +# api_key: !ENV ${DROPBOX_API_KEY} +# logs_folder_path: /yt-commenter/logs +# upload_logs_every: 120 # number of loops in commenter() +# type: dropbox +#emailer: # Not implemented yet +# - config: +# email_address: !ENV ${EMAIL_ADDRESS} +# api_key: !ENV ${GMAIL_API_KEY} +# type: gmail \ No newline at end of file diff --git a/youbot/run.py b/youbot/run.py index 63a5207..a5a77a5 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -109,8 +109,13 @@ def main(): conf_obj = Configuration(config_src=args.config_file) you_conf = conf_obj.get_config('youtube')[0] db_conf = conf_obj.get_config('datastore')[0] - cloud_conf = conf_obj.get_config('cloudstore')[0] comments_conf = conf_obj.get_config('comments')[0] + cloud_conf = None + if 'cloudstore' in conf_obj.config: + cloud_conf = conf_obj.get_config('cloudstore')[0] + emailer_conf = None + if 'emailer' in conf_obj.config: # Not implemented yet + emailer_conf = conf_obj.get_config('emailer')[0] # Setup YouTube API youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, cloud_conf=cloud_conf, comments_conf=comments_conf, diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 522c4fb..bafc593 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -22,8 +22,15 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: sleep_time: int, max_posted_hours: int, api_type: str, tag: str, log_path: str): self.db = YoutubeMySqlDatastore(config=db_conf['config']) - self.dbox = DropboxCloudManager(config=cloud_conf['config']) self.comments_conf = comments_conf['config'] + self.dbox = None + if cloud_conf is not None: + self.dbox = DropboxCloudManager(config=cloud_conf['config']) + self.dbox_logs_folder_path = cloud_conf['logs_folder_path'] + self.upload_logs_every = cloud_conf['upload_logs_every'] + elif self.comments_conf['type'] == 'dropbox': + raise YoutubeManagerError("Requested `dropbox` comments type " + "but `cloudstore` config is not set!") self.default_sleep_time = sleep_time self.max_posted_hours = max_posted_hours self.api_type = api_type @@ -31,8 +38,6 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: if self.api_type == 'simulated': self.get_uploads = self.simulate_uploads self.log_path = log_path - self.dbox_logs_folder_path = cloud_conf['logs_folder_path'] - self.upload_logs_every = cloud_conf['upload_logs_every'] super().__init__(config, tag) def commenter(self): @@ -43,10 +48,11 @@ def commenter(self): while True: time.sleep(sleep_time) # Log upload handling - loop_cnt += 1 - if loop_cnt > self.upload_logs_every: - self.upload_logs() - loop_cnt = 0 + if self.dbox is not None: + loop_cnt += 1 + if loop_cnt > self.upload_logs_every: + self.upload_logs() + loop_cnt = 0 # Load necessary data self.load_template_comments() channel_ids = [channel['channel_id'] for channel in @@ -72,7 +78,8 @@ def commenter(self): comments_added.append((video, video_url, comment_text, datetime.utcnow().isoformat())) except Exception as e: - logger.error(f"Exception in the main loop of the Commenter:\n{e}") + error_txt = f"Exception in the main loop of the Commenter:\n{e}" + logger.error(error_txt) sleep_time = self.seconds_until_next_hour() logger.error(f"Will sleep until next hour ({sleep_time} seconds)") else: @@ -83,7 +90,8 @@ def commenter(self): self.db.add_comment(video["channel_id"], video_link=video_url, comment_text=comment_text, upload_time=video["published_at"]) except Exception as e: - logger.error(f"MySQL error while storing comment:\n{e}") + error_txt = f"FatalMySQL error while storing comment:\n{e}" + logger.error(error_txt) raise e def get_comments(self, n_recent, channel_ids): From 9ec91c1feb27bad018fee107b549ad5e19d02d62 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 10:46:00 -0400 Subject: [PATCH 25/33] Incorporated the config tag in the logger #4 --- TODO.md | 3 ++- confs/commenter.yml | 18 +++++++++--------- youbot/run.py | 7 +++++-- youbot/youtube_utils/youtube_api.py | 4 +++- youbot/youtube_utils/youtube_manager.py | 8 +++++--- youbot/yt_mysql.py | 8 +++++--- 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/TODO.md b/TODO.md index 5ac33f4..39a008c 100644 --- a/TODO.md +++ b/TODO.md @@ -12,8 +12,9 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Store comments in dropbox - [X] \[Merged\] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) + Store errors in sql or dropbox - [X] Ensure code works without dropbox and emailer modules -- [ ] Add SQL scripts for creating the tables needed - [ ] Create the workflow for the accumulator +- [ ] Add SQL scripts for creating the tables needed +- [ ] Update Readme - [ ] Recreate the Livestreaming module - [ ] Use multiple accounts (different api keys) to check for new comments - [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) diff --git a/confs/commenter.yml b/confs/commenter.yml index 1aab5af..ee08c2f 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -13,20 +13,20 @@ youtube: client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - sleep_time: 1 - max_posted_hours: 250 # max num. of hours to check back for posted videos + sleep_time: !ENV ${SLEEP_TIME} + max_posted_hours: !ENV ${MAX_POSTED_HOURS} # max num. of hours to check back for posted videos type: simulated # normal, simulated comments: - config: local_folder_name: comments dropbox_folder_name: /yt-commenter/comments - type: local # local, dropbox (should set `cloudstore` config), or mysql (not implemented) -#cloudstore: # Optional -# - config: -# api_key: !ENV ${DROPBOX_API_KEY} -# logs_folder_path: /yt-commenter/logs -# upload_logs_every: 120 # number of loops in commenter() -# type: dropbox + type: !ENV ${COMMENTS_TYPE} # local, dropbox (should set `cloudstore` config), or mysql (not implemented) +cloudstore: # Optional + - config: + api_key: !ENV ${DROPBOX_API_KEY} + logs_folder_path: /yt-commenter/logs + upload_logs_every: !ENV ${UPLOAD_LOGS_EVERY} # number of loops in commenter() + type: dropbox #emailer: # Not implemented yet # - config: # email_address: !ENV ${EMAIL_ADDRESS} diff --git a/youbot/run.py b/youbot/run.py index a5a77a5..a587c4e 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -101,12 +101,15 @@ def main(): Example: python youbot/run.py -m commenter -c confs/commenter.yml -l logs/commenter.log """ + global logger # Initializing args = get_args() ColorLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=False) # Load the configurations conf_obj = Configuration(config_src=args.config_file) + tag = conf_obj.tag + logger = ColorLogger(logger_name=f'[{tag}] Main', color='yellow') you_conf = conf_obj.get_config('youtube')[0] db_conf = conf_obj.get_config('datastore')[0] comments_conf = conf_obj.get_config('comments')[0] @@ -119,8 +122,8 @@ def main(): # Setup YouTube API youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, cloud_conf=cloud_conf, comments_conf=comments_conf, - sleep_time=you_conf['sleep_time'], - max_posted_hours=you_conf['max_posted_hours'], + sleep_time=int(you_conf['sleep_time']), + max_posted_hours=int(you_conf['max_posted_hours']), api_type=you_conf['type'], tag=conf_obj.tag, log_path=args.log) # Run in the specified run mode func = globals()[args.run_mode] diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py index bf564ad..50a7870 100644 --- a/youbot/youtube_utils/youtube_api.py +++ b/youbot/youtube_utils/youtube_api.py @@ -14,7 +14,7 @@ from youbot import ColorLogger -logger = ColorLogger('YoutubeApi') +logger = ColorLogger(logger_name='YoutubeApi', color='green') class AbstractYoutubeApi(ABC): @@ -45,6 +45,8 @@ def _get_my_username_and_id(self) -> str: class YoutubeApiV3(AbstractYoutubeApi): def __init__(self, config: Dict, tag: str): + global logger + logger = ColorLogger(logger_name=f'[{tag}] YoutubeApi', color='green') super().__init__(config, tag) @staticmethod diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index bafc593..cc4ac3c 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -11,7 +11,7 @@ from youbot import ColorLogger, YoutubeMySqlDatastore, DropboxCloudManager from .youtube_api import YoutubeApiV3 -logger = ColorLogger('YoutubeManager') +logger = ColorLogger(logger_name='YoutubeManager', color='cyan') class YoutubeManager(YoutubeApiV3): @@ -21,13 +21,15 @@ class YoutubeManager(YoutubeApiV3): def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: Dict, sleep_time: int, max_posted_hours: int, api_type: str, tag: str, log_path: str): - self.db = YoutubeMySqlDatastore(config=db_conf['config']) + global logger + logger = ColorLogger(logger_name=f'[{tag}] YoutubeManager', color='cyan') + self.db = YoutubeMySqlDatastore(config=db_conf['config'], tag=tag) self.comments_conf = comments_conf['config'] self.dbox = None if cloud_conf is not None: self.dbox = DropboxCloudManager(config=cloud_conf['config']) self.dbox_logs_folder_path = cloud_conf['logs_folder_path'] - self.upload_logs_every = cloud_conf['upload_logs_every'] + self.upload_logs_every = int(cloud_conf['upload_logs_every']) elif self.comments_conf['type'] == 'dropbox': raise YoutubeManagerError("Requested `dropbox` comments type " "but `cloudstore` config is not set!") diff --git a/youbot/yt_mysql.py b/youbot/yt_mysql.py index 1454cdd..9a12768 100644 --- a/youbot/yt_mysql.py +++ b/youbot/yt_mysql.py @@ -2,19 +2,21 @@ from typing import * from datetime import datetime -logger = ColorLogger('YoutubeMySqlDatastore') +logger = ColorLogger(logger_name='YoutubeMySqlDatastore', color='red') class YoutubeMySqlDatastore(HighMySQL): CHANNEL_TABLE = 'channels' COMMENTS_TABLE = 'comments' - def __init__(self, config: Dict) -> None: + def __init__(self, config: Dict, tag: str) -> None: """ The basic constructor. Creates a new instance of Datastore using the specified credentials :param config: + :param tag: """ - + global logger + logger = ColorLogger(logger_name=f'[{tag}] YoutubeMySqlDatastore', color='red') super().__init__(config) self.create_tables_if_not_exist() From 59af4d9ca6e4abcd7535844d93b9a3b35e33b92d Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 11:51:21 -0400 Subject: [PATCH 26/33] Accumulator finished #4 --- TODO.md | 3 +- confs/accumulator.yml | 17 +++++++++ youbot/run.py | 18 +++++---- youbot/youtube_utils/youtube_api.py | 2 +- youbot/youtube_utils/youtube_manager.py | 49 +++++++++++++++++++++++-- 5 files changed, 76 insertions(+), 13 deletions(-) create mode 100644 confs/accumulator.yml diff --git a/TODO.md b/TODO.md index 39a008c..1132e37 100644 --- a/TODO.md +++ b/TODO.md @@ -12,7 +12,8 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Store comments in dropbox - [X] \[Merged\] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) + Store errors in sql or dropbox - [X] Ensure code works without dropbox and emailer modules -- [ ] Create the workflow for the accumulator +- [X] Create the workflow for the accumulator +- [ ] Load yt keys from Dropbox - [ ] Add SQL scripts for creating the tables needed - [ ] Update Readme - [ ] Recreate the Livestreaming module diff --git a/confs/accumulator.yml b/confs/accumulator.yml new file mode 100644 index 0000000..2ed18ab --- /dev/null +++ b/confs/accumulator.yml @@ -0,0 +1,17 @@ +tag: accumulator +datastore: + - config: + hostname: !ENV ${MYSQL_HOST} + username: !ENV ${MYSQL_USERNAME} + password: !ENV ${MYSQL_PASSWORD} + db_name: !ENV ${MYSQL_DB_NAME} + port: 3306 + type: mysql +youtube: + - config: + client_id: !ENV ${CLIENT_ID} + client_secret: !ENV ${CLIENT_SECRET} + api_version: v3 + read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl + sleep_time: !ENV ${SLEEP_TIME} + type: simulated # normal, simulated \ No newline at end of file diff --git a/youbot/run.py b/youbot/run.py index a587c4e..29a9825 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -66,7 +66,7 @@ def commenter(youtube: YoutubeManager, args: argparse.Namespace) -> None: def accumulator(youtube: YoutubeManager, args: argparse.Namespace) -> None: - raise NotImplementedError() + youtube.accumulator() def set_priority(youtube: YoutubeManager, args: argparse.Namespace) -> None: @@ -106,15 +106,19 @@ def main(): # Initializing args = get_args() ColorLogger.setup_logger(log_path=args.log, debug=args.debug, clear_log=False) - # Load the configurations + # Load configurations conf_obj = Configuration(config_src=args.config_file) tag = conf_obj.tag - logger = ColorLogger(logger_name=f'[{tag}] Main', color='yellow') + logger = ColorLogger(logger_name=f'[{tag}] Main', color='yellow') # Reconfigure with the tag you_conf = conf_obj.get_config('youtube')[0] + sleep_time = int(you_conf['sleep_time']) + max_posted_hours = int(you_conf['max_posted_hours']) if 'max_posted_hours' in you_conf else -1 db_conf = conf_obj.get_config('datastore')[0] - comments_conf = conf_obj.get_config('comments')[0] + comments_conf = None + if 'comments' in conf_obj.config: # Optional + comments_conf = conf_obj.get_config('comments')[0] cloud_conf = None - if 'cloudstore' in conf_obj.config: + if 'cloudstore' in conf_obj.config: # Optional cloud_conf = conf_obj.get_config('cloudstore')[0] emailer_conf = None if 'emailer' in conf_obj.config: # Not implemented yet @@ -122,8 +126,8 @@ def main(): # Setup YouTube API youtube = YoutubeManager(config=you_conf['config'], db_conf=db_conf, cloud_conf=cloud_conf, comments_conf=comments_conf, - sleep_time=int(you_conf['sleep_time']), - max_posted_hours=int(you_conf['max_posted_hours']), + sleep_time=sleep_time, + max_posted_hours=max_posted_hours, api_type=you_conf['type'], tag=conf_obj.tag, log_path=args.log) # Run in the specified run mode func = globals()[args.run_mode] diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py index 50a7870..9ff3ed6 100644 --- a/youbot/youtube_utils/youtube_api.py +++ b/youbot/youtube_utils/youtube_api.py @@ -53,7 +53,7 @@ def __init__(self, config: Dict, tag: str): def _build_api(client_id: str, client_secret: str, api_version: str, read_only_scope: str, tag: str) -> googleapiclient.discovery.Resource: """ - Build a youtube api connection. + Build a YouTube api connection. Args: client_id: diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index cc4ac3c..1e67ddd 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -24,15 +24,18 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: global logger logger = ColorLogger(logger_name=f'[{tag}] YoutubeManager', color='cyan') self.db = YoutubeMySqlDatastore(config=db_conf['config'], tag=tag) - self.comments_conf = comments_conf['config'] + self.comments_conf = None + if comments_conf is not None: + self.comments_conf = comments_conf['config'] self.dbox = None if cloud_conf is not None: self.dbox = DropboxCloudManager(config=cloud_conf['config']) self.dbox_logs_folder_path = cloud_conf['logs_folder_path'] self.upload_logs_every = int(cloud_conf['upload_logs_every']) - elif self.comments_conf['type'] == 'dropbox': - raise YoutubeManagerError("Requested `dropbox` comments type " - "but `cloudstore` config is not set!") + elif self.comments_conf is not None: + if self.comments_conf['type'] == 'dropbox': + raise YoutubeManagerError("Requested `dropbox` comments type " + "but `cloudstore` config is not set!") self.default_sleep_time = sleep_time self.max_posted_hours = max_posted_hours self.api_type = api_type @@ -96,6 +99,41 @@ def commenter(self): logger.error(error_txt) raise e + def accumulator(self): + # Initialize + sleep_time = 0 + while True: + try: + time.sleep(sleep_time) + # Load recent comments + recent_commented_links = [comment["video_link"] for comment in + self.db.get_comments(n_recent=200)] + # Get info for recent comments with YT api + comments = [] + exceptions = [] + for cnt, link in enumerate(recent_commented_links): + try: + comments.extend(self.get_video_comments(link)) + except Exception as e: + exceptions.append(e) + # Update comment data in the DB + for comment_dict in comments: + self.db.update_comment(video_link=comment_dict['url'], + comment_id=comment_dict['comment_id'], + like_cnt=comment_dict['like_count'], + reply_cnt=comment_dict['reply_count']) + if len(exceptions) > 0: + logger.error(f"{len(exceptions)} exceptions occurred! " + f"Will only print the first one.") + raise exceptions[0] + except Exception as e: + error_txt = f"Exception in the main loop of the Accumulator:\n{e}" + logger.error(error_txt) + sleep_time = self.seconds_until_next_hour() + logger.error(f"Will sleep until next hour ({sleep_time} seconds)") + else: + sleep_time = self.default_sleep_time + def get_comments(self, n_recent, channel_ids): commented_comments = {} video_links_commented = [] @@ -175,6 +213,9 @@ def list_comments(self, n_recent: int = 50, min_likes: int = -1, self.pretty_print(headers, comments) def load_template_comments(self): + if self.comments_conf is None: + raise YoutubeManagerError("Tried to load template comments " + "but `comments` is not set in the config!") # Download files from dropbox if self.comments_conf['type'] == 'dropbox': # TODO: implement this in the dropbox lib From 9a1cfbc5527854ba9615fefc86681f5045a21de2 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 12:58:52 -0400 Subject: [PATCH 27/33] Loading keys from dropbox. Tweaked the YML structure #4 --- TODO.md | 2 +- confs/accumulator.yml | 12 ++++++-- confs/commenter.yml | 13 +++++---- confs/generic.yml | 24 +++++++++++++++ youbot/run.py | 8 +++-- youbot/youtube_utils/youtube_api.py | 7 ++++- youbot/youtube_utils/youtube_manager.py | 39 +++++++++++++++++++------ 7 files changed, 84 insertions(+), 21 deletions(-) create mode 100644 confs/generic.yml diff --git a/TODO.md b/TODO.md index 1132e37..66d8d78 100644 --- a/TODO.md +++ b/TODO.md @@ -13,7 +13,7 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] \[Merged\] Regularly backup logs files from logs/ to dropbox (for when running on Heroku) + Store errors in sql or dropbox - [X] Ensure code works without dropbox and emailer modules - [X] Create the workflow for the accumulator -- [ ] Load yt keys from Dropbox +- [X] Load yt keys from Dropbox - [ ] Add SQL scripts for creating the tables needed - [ ] Update Readme - [ ] Recreate the Livestreaming module diff --git a/confs/accumulator.yml b/confs/accumulator.yml index 2ed18ab..f961720 100644 --- a/confs/accumulator.yml +++ b/confs/accumulator.yml @@ -13,5 +13,13 @@ youtube: client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - sleep_time: !ENV ${SLEEP_TIME} - type: simulated # normal, simulated \ No newline at end of file + sleep_time: !ENV ${SLEEP_TIME} + load_keys_from_cloud: true # cloudstore config is required + keys_path: keys + type: simulated # normal, simulated +cloudstore: # Optional + - config: + api_key: !ENV ${DROPBOX_API_KEY} + logs_folder_path: /yt-commenter/logs + keys_folder_path: /yt-commenter/keys + type: dropbox \ No newline at end of file diff --git a/confs/commenter.yml b/confs/commenter.yml index ee08c2f..3a5615f 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -13,19 +13,22 @@ youtube: client_secret: !ENV ${CLIENT_SECRET} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - sleep_time: !ENV ${SLEEP_TIME} - max_posted_hours: !ENV ${MAX_POSTED_HOURS} # max num. of hours to check back for posted videos + load_keys_from_cloud: true # cloudstore config is required + keys_path: keys + sleep_time: !ENV ${SLEEP_TIME} + max_posted_hours: !ENV ${MAX_POSTED_HOURS} # max num. of hours to check back for posted videos type: simulated # normal, simulated comments: - config: local_folder_name: comments dropbox_folder_name: /yt-commenter/comments - type: !ENV ${COMMENTS_TYPE} # local, dropbox (should set `cloudstore` config), or mysql (not implemented) + type: !ENV ${COMMENTS_TYPE} # local, dropbox (should set `cloudstore` config), or mysql (not implemented) cloudstore: # Optional - config: api_key: !ENV ${DROPBOX_API_KEY} - logs_folder_path: /yt-commenter/logs - upload_logs_every: !ENV ${UPLOAD_LOGS_EVERY} # number of loops in commenter() + logs_folder_path: /yt-commenter/logs + keys_folder_path: /yt-commenter/keys + upload_logs_every: !ENV ${UPLOAD_LOGS_EVERY} # number of loops in commenter() type: dropbox #emailer: # Not implemented yet # - config: diff --git a/confs/generic.yml b/confs/generic.yml new file mode 100644 index 0000000..f6d8a50 --- /dev/null +++ b/confs/generic.yml @@ -0,0 +1,24 @@ +tag: generic +datastore: + - config: + hostname: !ENV ${MYSQL_HOST} + username: !ENV ${MYSQL_USERNAME} + password: !ENV ${MYSQL_PASSWORD} + db_name: !ENV ${MYSQL_DB_NAME} + port: 3306 + type: mysql +youtube: + - config: + client_id: !ENV ${CLIENT_ID} + client_secret: !ENV ${CLIENT_SECRET} + api_version: v3 + read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl + load_keys_from_cloud: true # cloudstore config is required + keys_path: keys + type: simulated # normal, simulated +cloudstore: # Optional + - config: + api_key: !ENV ${DROPBOX_API_KEY} + logs_folder_path: /yt-commenter/logs + keys_folder_path: /yt-commenter/keys + type: dropbox \ No newline at end of file diff --git a/youbot/run.py b/youbot/run.py index 29a9825..fe1dd47 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -109,10 +109,12 @@ def main(): # Load configurations conf_obj = Configuration(config_src=args.config_file) tag = conf_obj.tag - logger = ColorLogger(logger_name=f'[{tag}] Main', color='yellow') # Reconfigure with the tag + logger = ColorLogger(logger_name=f'[{tag}] Main', color='yellow') # Reconfigures it with the tag you_conf = conf_obj.get_config('youtube')[0] - sleep_time = int(you_conf['sleep_time']) - max_posted_hours = int(you_conf['max_posted_hours']) if 'max_posted_hours' in you_conf else -1 + sleep_time = int(you_conf['config']['sleep_time']) \ + if 'sleep_time' in you_conf else 120 + max_posted_hours = int(you_conf['config']['max_posted_hours']) \ + if 'max_posted_hours' in you_conf else 24 db_conf = conf_obj.get_config('datastore')[0] comments_conf = None if 'comments' in conf_obj.config: # Optional diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py index 9ff3ed6..1a88fd4 100644 --- a/youbot/youtube_utils/youtube_api.py +++ b/youbot/youtube_utils/youtube_api.py @@ -29,7 +29,12 @@ def __init__(self, config: Dict, tag: str) -> None: """ self.tag = tag - self._api = self._build_api(**config, tag=self.tag) + self._api = self._build_api( + client_id=config['client_id'], + client_secret=config['client_secret'], + api_version=config['api_version'], + read_only_scope=config['read_only_scope'], + tag=self.tag) self.channel_name, self.channel_id = self._get_my_username_and_id() @staticmethod diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 1e67ddd..87a4543 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -16,7 +16,8 @@ class YoutubeManager(YoutubeApiV3): __slots__ = ('db', 'dbox', 'comments_conf', 'default_sleep_time', 'max_posted_hours', 'api_type', - 'template_comments', 'log_path', 'upload_logs_every') + 'template_comments', 'log_path', 'upload_logs_every', 'keys_path', + 'dbox_logs_folder_path', 'dbox_keys_folder_path', 'comments_src') def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: Dict, sleep_time: int, max_posted_hours: int, @@ -26,14 +27,18 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: self.db = YoutubeMySqlDatastore(config=db_conf['config'], tag=tag) self.comments_conf = None if comments_conf is not None: + self.comments_src = comments_conf['type'] self.comments_conf = comments_conf['config'] self.dbox = None if cloud_conf is not None: - self.dbox = DropboxCloudManager(config=cloud_conf['config']) + cloud_conf = cloud_conf['config'] + self.dbox = DropboxCloudManager(config=cloud_conf) self.dbox_logs_folder_path = cloud_conf['logs_folder_path'] - self.upload_logs_every = int(cloud_conf['upload_logs_every']) + self.dbox_keys_folder_path = cloud_conf['keys_folder_path'] + self.upload_logs_every = int( + cloud_conf['upload_logs_every']) if 'upload_logs_every' in cloud_conf else 100 elif self.comments_conf is not None: - if self.comments_conf['type'] == 'dropbox': + if self.comments_src == 'dropbox': raise YoutubeManagerError("Requested `dropbox` comments type " "but `cloudstore` config is not set!") self.default_sleep_time = sleep_time @@ -42,7 +47,11 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: self.template_comments = {} if self.api_type == 'simulated': self.get_uploads = self.simulate_uploads + self.keys_path = config['keys_path'] self.log_path = log_path + if 'load_keys_from_cloud' in config: + if config['load_keys_from_cloud'] is True: + self.load_keys_from_cloud() super().__init__(config, tag) def commenter(self): @@ -204,9 +213,9 @@ def list_channels(self) -> None: def list_comments(self, n_recent: int = 50, min_likes: int = -1, min_replies: int = -1) -> None: - comments = [(row["username"].title(), row["comment"], + comments = [[row["username"].title(), row["comment"], arrow.get(row["comment_time"]).humanize(), - row["like_count"], row["reply_count"], row["comment_link"]) + row["like_count"], row["reply_count"], row["comment_link"]] for row in self.db.get_comments(n_recent, min_likes, min_replies)] headers = ['Channel', 'Comment', 'Time', 'Likes', 'Replies', 'Comment URL'] @@ -217,7 +226,7 @@ def load_template_comments(self): raise YoutubeManagerError("Tried to load template comments " "but `comments` is not set in the config!") # Download files from dropbox - if self.comments_conf['type'] == 'dropbox': + if self.comments_src == 'dropbox': # TODO: implement this in the dropbox lib if not os.path.exists(self.comments_conf["local_folder_name"]): os.makedirs(self.comments_conf["local_folder_name"]) @@ -226,7 +235,7 @@ def load_template_comments(self): self.dbox.download_file(f'{self.comments_conf["dropbox_folder_name"]}/{file}', f'{self.comments_conf["local_folder_name"]}/{file}') # Load comments from files - if self.comments_conf['type'] in ('local', 'dropbox'): + if self.comments_src in ('local', 'dropbox'): base_path = os.path.dirname(os.path.abspath(__file__)) comments_path = os.path.join(base_path, '../..', self.comments_conf['local_folder_name'], "*.txt") @@ -270,6 +279,18 @@ def upload_logs(self): file_to_upload = f.read() self.dbox.upload_file(file_bytes=file_to_upload, upload_path=upload_path) + def load_keys_from_cloud(self): + if self.dbox is None: + raise YoutubeManagerError("`load_keys_from_cloud` was set to True " + "but no `cloudstore` config was given!") + + if not os.path.exists(self.keys_path): + os.makedirs(self.keys_path) + for file in self.dbox.ls(self.dbox_keys_folder_path).keys(): + if file[-5:] == '.json': + self.dbox.download_file(f'{self.dbox_keys_folder_path}/{file}', + f'{self.keys_path}/{file}') + def simulate_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: """ Generates new uploads for the specified channels. @@ -301,7 +322,7 @@ def simulate_uploads(self, channels: List, max_posted_hours: int = 2) -> Dict: yield upload @staticmethod - def pretty_print(headers: List[str], data: List[Tuple]): + def pretty_print(headers: List[str], data: List[List]): """Print the provided header and data in a visually pleasing manner Args: From ca0c8c1b7c41b617947a231ef6e997eb99d632dc Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 13:35:08 -0400 Subject: [PATCH 28/33] Tested the create_tables function, using different env YT vars for each config #4 --- TODO.md | 6 +++++- confs/accumulator.yml | 8 ++++---- confs/commenter.yml | 10 +++++----- confs/generic.yml | 6 +++--- youbot/yt_mysql.py | 1 + 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/TODO.md b/TODO.md index 66d8d78..28b407a 100644 --- a/TODO.md +++ b/TODO.md @@ -14,7 +14,11 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Ensure code works without dropbox and emailer modules - [X] Create the workflow for the accumulator - [X] Load yt keys from Dropbox -- [ ] Add SQL scripts for creating the tables needed +- [X] Add SQL scripts for creating the tables needed (automatically checks and creates on init) +- [X] Different YT env vars for each yml +- [ ] Option to set username manually +- [ ] Test that everything works properly +- [ ] Configure Procfile and circleci config - [ ] Update Readme - [ ] Recreate the Livestreaming module - [ ] Use multiple accounts (different api keys) to check for new comments diff --git a/confs/accumulator.yml b/confs/accumulator.yml index f961720..2412523 100644 --- a/confs/accumulator.yml +++ b/confs/accumulator.yml @@ -9,14 +9,14 @@ datastore: type: mysql youtube: - config: - client_id: !ENV ${CLIENT_ID} - client_secret: !ENV ${CLIENT_SECRET} + client_id: !ENV ${CLIENT_ID_ACC} + client_secret: !ENV ${CLIENT_SECRET_ACC} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl - sleep_time: !ENV ${SLEEP_TIME} + sleep_time: !ENV ${SLEEP_TIME_ACC} load_keys_from_cloud: true # cloudstore config is required keys_path: keys - type: simulated # normal, simulated + type: normal # normal, simulated cloudstore: # Optional - config: api_key: !ENV ${DROPBOX_API_KEY} diff --git a/confs/commenter.yml b/confs/commenter.yml index 3a5615f..9eaffbc 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -9,15 +9,15 @@ datastore: type: mysql youtube: - config: - client_id: !ENV ${CLIENT_ID} - client_secret: !ENV ${CLIENT_SECRET} + client_id: !ENV ${CLIENT_ID_COMM} + client_secret: !ENV ${CLIENT_SECRET_COMM} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl load_keys_from_cloud: true # cloudstore config is required keys_path: keys - sleep_time: !ENV ${SLEEP_TIME} - max_posted_hours: !ENV ${MAX_POSTED_HOURS} # max num. of hours to check back for posted videos - type: simulated # normal, simulated + sleep_time: !ENV ${SLEEP_TIME_COMM} + max_posted_hours: !ENV ${MAX_POSTED_HOURS_COMM} # max num. of hours to check back for posted videos + type: normal # normal, simulated comments: - config: local_folder_name: comments diff --git a/confs/generic.yml b/confs/generic.yml index f6d8a50..606a106 100644 --- a/confs/generic.yml +++ b/confs/generic.yml @@ -9,13 +9,13 @@ datastore: type: mysql youtube: - config: - client_id: !ENV ${CLIENT_ID} - client_secret: !ENV ${CLIENT_SECRET} + client_id: !ENV ${CLIENT_ID_GEN} + client_secret: !ENV ${CLIENT_SECRET_GEN} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl load_keys_from_cloud: true # cloudstore config is required keys_path: keys - type: simulated # normal, simulated + type: normal # normal, simulated cloudstore: # Optional - config: api_key: !ENV ${DROPBOX_API_KEY} diff --git a/youbot/yt_mysql.py b/youbot/yt_mysql.py index 9a12768..a3b19b6 100644 --- a/youbot/yt_mysql.py +++ b/youbot/yt_mysql.py @@ -39,6 +39,7 @@ def create_tables_if_not_exist(self): video_link varchar(100) not null, comment varchar(255) not null, comment_time varchar(100) not null, + upload_time varchar(100) not null, like_count int default -1 null, reply_count int default -1 null, comment_id varchar(100) default '-1' null, From dd9d274bf2b0bc3e05d5f37feb1d224746734fcf Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 14:34:09 -0400 Subject: [PATCH 29/33] Fixed minor bugs #4 --- TODO.md | 4 ++-- confs/accumulator.yml | 2 ++ confs/commenter.yml | 1 + confs/generic.yml | 1 + youbot/run.py | 4 ++-- youbot/youtube_utils/youtube_api.py | 5 ++--- youbot/youtube_utils/youtube_manager.py | 14 +++++++++++--- 7 files changed, 21 insertions(+), 10 deletions(-) diff --git a/TODO.md b/TODO.md index 28b407a..b08fa59 100644 --- a/TODO.md +++ b/TODO.md @@ -16,8 +16,8 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Load yt keys from Dropbox - [X] Add SQL scripts for creating the tables needed (automatically checks and creates on init) - [X] Different YT env vars for each yml -- [ ] Option to set username manually -- [ ] Test that everything works properly +- [X] Option to set username manually +- [X] Test that everything works properly - [ ] Configure Procfile and circleci config - [ ] Update Readme - [ ] Recreate the Livestreaming module diff --git a/confs/accumulator.yml b/confs/accumulator.yml index 2412523..db9f9fb 100644 --- a/confs/accumulator.yml +++ b/confs/accumulator.yml @@ -13,6 +13,8 @@ youtube: client_secret: !ENV ${CLIENT_SECRET_ACC} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl + username: !ENV ${USERNAME_ACC} # Can be omitted (automatically derived) + comment_search_term: !ENV ${SEARCH_TERM_ACC} # Can be omitted (username will be used instead - sometimes doesn't work) sleep_time: !ENV ${SLEEP_TIME_ACC} load_keys_from_cloud: true # cloudstore config is required keys_path: keys diff --git a/confs/commenter.yml b/confs/commenter.yml index 9eaffbc..f7c2991 100644 --- a/confs/commenter.yml +++ b/confs/commenter.yml @@ -13,6 +13,7 @@ youtube: client_secret: !ENV ${CLIENT_SECRET_COMM} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl + username: !ENV ${USERNAME_COMM} # Can be omitted (automatically derived) load_keys_from_cloud: true # cloudstore config is required keys_path: keys sleep_time: !ENV ${SLEEP_TIME_COMM} diff --git a/confs/generic.yml b/confs/generic.yml index 606a106..5ce3e36 100644 --- a/confs/generic.yml +++ b/confs/generic.yml @@ -13,6 +13,7 @@ youtube: client_secret: !ENV ${CLIENT_SECRET_GEN} api_version: v3 read_only_scope: https://www.googleapis.com/auth/youtube.force-ssl + username: !ENV ${USERNAME_GEN} # Can be omitted (automatically derived) load_keys_from_cloud: true # cloudstore config is required keys_path: keys type: normal # normal, simulated diff --git a/youbot/run.py b/youbot/run.py index fe1dd47..a168e09 100644 --- a/youbot/run.py +++ b/youbot/run.py @@ -112,9 +112,9 @@ def main(): logger = ColorLogger(logger_name=f'[{tag}] Main', color='yellow') # Reconfigures it with the tag you_conf = conf_obj.get_config('youtube')[0] sleep_time = int(you_conf['config']['sleep_time']) \ - if 'sleep_time' in you_conf else 120 + if 'sleep_time' in you_conf['config'] else 120 max_posted_hours = int(you_conf['config']['max_posted_hours']) \ - if 'max_posted_hours' in you_conf else 24 + if 'max_posted_hours' in you_conf['config'] else 24 db_conf = conf_obj.get_config('datastore')[0] comments_conf = None if 'comments' in conf_obj.config: # Optional diff --git a/youbot/youtube_utils/youtube_api.py b/youbot/youtube_utils/youtube_api.py index 1a88fd4..a4547e4 100644 --- a/youbot/youtube_utils/youtube_api.py +++ b/youbot/youtube_utils/youtube_api.py @@ -105,9 +105,8 @@ def comment(self, video_id: str, comment_text: str) -> None: properties = {'snippet.channelId': self.channel_id, 'snippet.videoId': video_id, 'snippet.topLevelComment.snippet.textOriginal': comment_text} - # self._comment_threads_insert(properties=properties, - # part='snippet') - # TODO: uncomment this when commenter is done + self._comment_threads_insert(properties=properties, + part='snippet') except Exception as exc: logger.error(f"An error occurred:\n{exc}") diff --git a/youbot/youtube_utils/youtube_manager.py b/youbot/youtube_utils/youtube_manager.py index 87a4543..49c451b 100644 --- a/youbot/youtube_utils/youtube_manager.py +++ b/youbot/youtube_utils/youtube_manager.py @@ -17,7 +17,8 @@ class YoutubeManager(YoutubeApiV3): __slots__ = ('db', 'dbox', 'comments_conf', 'default_sleep_time', 'max_posted_hours', 'api_type', 'template_comments', 'log_path', 'upload_logs_every', 'keys_path', - 'dbox_logs_folder_path', 'dbox_keys_folder_path', 'comments_src') + 'dbox_logs_folder_path', 'dbox_keys_folder_path', 'comments_src', + 'comment_search_term') def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: Dict, sleep_time: int, max_posted_hours: int, @@ -49,10 +50,15 @@ def __init__(self, config: Dict, db_conf: Dict, cloud_conf: Dict, comments_conf: self.get_uploads = self.simulate_uploads self.keys_path = config['keys_path'] self.log_path = log_path + self.comment_search_term = None + if 'comment_search_term' in config: + self.comment_search_term = config['comment_search_term'] if 'load_keys_from_cloud' in config: if config['load_keys_from_cloud'] is True: self.load_keys_from_cloud() super().__init__(config, tag) + if 'username' in config: + self.channel_name = config['username'] def commenter(self): # Initialize @@ -87,7 +93,7 @@ def commenter(self): comment_text = \ self.get_next_template_comment(channel_id=video["channel_id"], commented_comments=commented_comments) - # self.comment(video_id=video["id"], comment_text=comment_text) + self.comment(video_id=video["id"], comment_text=comment_text) # Add the info of the new comment to be added in the DB after this loop comments_added.append((video, video_url, comment_text, datetime.utcnow().isoformat())) @@ -103,6 +109,7 @@ def commenter(self): for (video, video_url, comment_text, comment_time) in comments_added: self.db.add_comment(video["channel_id"], video_link=video_url, comment_text=comment_text, upload_time=video["published_at"]) + logger.info(f"Comment Added to Channel: {video['channel_id']} ({video_url})") except Exception as e: error_txt = f"FatalMySQL error while storing comment:\n{e}" logger.error(error_txt) @@ -122,7 +129,8 @@ def accumulator(self): exceptions = [] for cnt, link in enumerate(recent_commented_links): try: - comments.extend(self.get_video_comments(link)) + comments.extend(self.get_video_comments(url=link, + search_terms=self.comment_search_term)) except Exception as e: exceptions.append(e) # Update comment data in the DB From af795f884a551eb9e99b2acf3857ca35c080d266 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 14:42:37 -0400 Subject: [PATCH 30/33] Configured Procfile and circleci config #4 --- .circleci/config.yml | 9 +++++---- Procfile | 9 +++++++-- TODO.md | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4712a63..257ac44 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,7 +8,8 @@ jobs: # A basic unit of work in a run - image: circleci/python:3.8 steps: # steps that comprise the `build` job - checkout # check out source code to working directory - - run: make clean server=circleci - - run: make create_env server=circleci - - run: make setup server=circleci - - run: make run_tests server=circleci + - run: make clean env=venv + - run: make create_env env=venv + - run: make requirements env=venv + - run: make setup env=venv + - run: make run_tests env=venv diff --git a/Procfile b/Procfile index 13a2f3b..8e501a5 100644 --- a/Procfile +++ b/Procfile @@ -1,2 +1,7 @@ -run_tests: make run_tests -main: python youbot/run.py -m commenter -c confs/commenter.yml -l logs/commenter.log \ No newline at end of file +commenter: python youbot/run.py -c confs/commenter.yml -l logs/commenter.log -m commenter +accumulator: python youbot/run.py -c confs/accumulator.yml -l logs/accumulator.log -m accumulator +list_comments: python youbot/run.py -c confs/generic.yml -l logs/generic.log -m list_comments --n-recent 10 +list_channels: python youbot/run.py -c confs/generic.yml -l logs/generic.log -m list_channels +add_channel: python youbot/run.py -c confs/generic.yml -l logs/generic.log -m add_channel -i UC-lHJZR3Gqxm24_Vd_AJ5Yw +set_priority: python youbot/run.py -c confs/generic.yml -l logs/generic.log -m set_priority --priority 1 -i UC-ImLFXGIe2FC4Wo5hOodnw +refresh_photos: python youbot/run.py -c confs/generic.yml -l logs/generic.log -m refresh_photos diff --git a/TODO.md b/TODO.md index b08fa59..697974b 100644 --- a/TODO.md +++ b/TODO.md @@ -18,7 +18,7 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Different YT env vars for each yml - [X] Option to set username manually - [X] Test that everything works properly -- [ ] Configure Procfile and circleci config +- [X] Configure Procfile and circleci config - [ ] Update Readme - [ ] Recreate the Livestreaming module - [ ] Use multiple accounts (different api keys) to check for new comments From 3b57f29fc902c6ca2ca7404d905c9e3da63078e1 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 17:38:50 -0400 Subject: [PATCH 31/33] Probably everything is done #4 --- README.md | 354 +++++++++++++++++++++------------------------------ TODO.md | 4 +- img/snek.png | 0 3 files changed, 145 insertions(+), 213 deletions(-) delete mode 100644 img/snek.png diff --git a/README.md b/README.md index 01817b5..737fa0a 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,23 @@ -# YoutubeCommentBot +# Youtube Comment Bot -[![CircleCI](https://circleci.com/gh/drkostas/youbot/tree/master.svg?style=svg)](https://circleci.com/gh/drkostas/youbot/tree/master) -[![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/drkostas/youbot/master/LICENSE) +[![CircleCI](https://circleci.com/gh/drkostas/Youtube-FirstCommentBot/tree/master.svg?style=svg)](https://circleci.com/gh/drkostas/Youtube-FirstCommentBot/tree/master) +[![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/drkostas/Youtube-FirstCommentBot/master/LICENSE) +Buy Me A Coffee ## Table of Contents + [About](#about) + [Getting Started](#getting_started) + [Prerequisites](#prerequisites) -+ [Installing, Testing, Building](#installing) - + [Available Make Commands](#check_make_commamnds) - + [Clean Previous Builds](#clean_previous) - + [Create a new virtual environment](#create_env) - + [Build Locally (and install requirements)](#build_locally) - + [Run the tests](#tests) -+ [Running locally](#run_locally) - + [Configuration](#configuration) - + [Environment Variables](#env_variables) - + [Execution Options](#execution_options) - + [YoutubeCommentBot Main](#youbot_main) - + [YoutubeCommentBot Greet CLI](#youbot_cli) -+ [Deployment](#deployment) ++ [Build and prepare the project](#build_prepare) + + [Install the requirements](#install) + + [Create the config files](#configs) + + [Specify the pool of comments](#comments_pool) + + [Start following channels](#add_channels) ++ [Run the Bot](#commenter) ++ [Gathering statistics about the comments](#accumulator) ++ [Using Dropbox](#dropbox) ++ [Deployment on Heroku](#heroku) + [Continuous Ιntegration](#ci) + [Todo](#todo) + [Built With](#built_with) @@ -29,7 +26,21 @@ ## About -A bot that takes a list of youtube channels and posts the first comment in every new video. +A bot that leaves the first comment on every new video of specified channels. + +DISCLAIMER: This project is built for educational purposes. DO NOT use it to create spam-bots. + +Current modules: + +- Commenter: Looks for new videos indefinitely and leaves a comment as soon as something is posted +- Accumulator: Goes through all the comments posted and populates the `comments` table in the DB with + metadata such as the likes and replies count +- List Channels: It lists the Channels that are currently followed by the bot +- List Comments: It lists all the Comments posted by the bot +- Add Channel: It adds a new channel to the following list +- Set Priority: It set the comment priority of a specified channel +- Refresh Photo: It gathers and populates the `channels` table in the DB with urls to the Channels' + profile photos ## Getting Started @@ -39,277 +50,198 @@ system. ### Prerequisites -You need to have a machine with Python > 3.6 and any Bash based shell (e.g. zsh) installed. +You need to have a machine with Python >= 3.8 and any Bash-like shell (e.g. zsh) installed. ```ShellSession $ python3.8 -V -Python 3.8.5 +Python 3.8 $ echo $SHELL /usr/bin/zsh ``` -If you want to usy any of the Gmail, Dropbox, Mysql packages you should set up some of: -- Gmail: An application-specific password for your Google account. -[Reference 1](https://support.google.com/mail/?p=InvalidSecondFactor), -[Reference 2](https://security.google.com/settings/security/apppasswords) -- Dropbox: An Api key for your Dropbox account. -[Reference 1](http://99rabbits.com/get-dropbox-access-token/), -[Reference 2](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) -- MySql: If you haven't any, you can create a free one on Amazon RDS. -[Reference 1](https://aws.amazon.com/rds/free/), -[Reference 2](https://bigdataenthusiast.wordpress.com/2016/03/05/aws-rds-instance-setup-oracle-db-on-cloud-free-tier/) +This project requires a MySQL database and a YouTube Api key. Optionally, you can also set up a Dropbox +Api key which is very useful for when you use Heroku to deploy the bot. -## Installing, Testing, Building +References: -All the installation steps are being handled by the [Makefile](Makefile). The `server=local` flag -basically specifies that you want to use conda instead of venv, and it can be changed easily in the -lines `#25-28`. `local` is also the default flag, so you can omit it. +- YouTube: Use the Google API Console to create OAuth 2.0 credentials: + + Visit the [developer console](https://console.cloud.google.com/apis/dashboard) + + Create a new project + + Open the [API Manager](https://console.developers.google.com/apis/) + + Enable YouTube Data API v3 + + Go to [Credentials](https://console.cloud.google.com/apis/credentials) + + Configure the OAuth consent screen and create OAuth client ID credentials + + Use Application Type Other and provide a client name (e.g. YoutubeBot) + + Confirm and download the generated credentials as JSON file + + Store the file in the `keys` folder 3 times as `keys/generic.json`, `keys/commenter.json` + , `keys/accumulator.json` +- MySQL: If you don't ha DB already, you can create one for free with Amazon RDS: + [Reference 1](https://aws.amazon.com/rds/free/), + [Reference 2](https://bigdataenthusiast.wordpress.com/2016/03/05/aws-rds-instance-setup-oracle-db-on-cloud-free-tier/) +- Dropbox: How to set up an Api key for your Dropbox account: + [Reference 1](http://99rabbits.com/get-dropbox-access-token/), + [Reference 2](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) -If you don't want to go through the detailed setup steps but finish the installation and run the -tests quickly, execute the following command: +## Build and prepare the project -```ShellSession -$ make install server=local -``` +This section will go through the installation steps, setting up the configuration files and comments, +and preparing the DB tables. -If you executed the previous command, you can skip through to -the [Running locally section](#run_locally). +### Install the requirements -### Check the available make commands +All the installation steps are being handled by the [Makefile](Makefile). By default, it uses `conda` +environments. If you want to use `virtualenv` instead, append to every `make` command the flag: +`env=venv`. If you want to modify the name of the environment or use another python version, modify the +first lines of the [Makefile](Makefile). + +Deactivate and active conda environment, install the requirements and load the newly created +environment: ```ShellSession +$ conda deactivate +$ make install +$ conda activate youbot +``` -$ make help ------------------------------------------------------------------------------------------------------------ - DISPLAYING HELP ------------------------------------------------------------------------------------------------------------ -Use make [server=] to specify the server -Prod, and local are using conda env, circleci uses virtualenv. Default: local - -make help - Display this message -make install [server=] - Call clean delete_conda_env create_conda_env setup run_tests -make clean [server=] - Delete all './build ./dist ./*.pyc ./*.tgz ./*.egg-info' files -make delete_env [server=] - Delete the current conda env or virtualenv -make create_env [server=] - Create a new conda env or virtualenv for the specified python version -make setup [server=] - Call setup.py install -make run_tests [server=] - Run all the tests from the specified folder ------------------------------------------------------------------------------------------------------------ +### Create the config files -``` +The project uses YML config file along with command line arguments. There are three configs I am using: -### Clean any previous builds +- [generic.yml](confs/generic.yml): Used for running the following commands: + - list_channels + - list_comments + - add_channel + - refresh_photos + - set_priority +- [commenter.yml](confs/commenter.yml): Used to run the `commenter` command +- [accumulator.yml](confs/accumulator.yml): Used to run the `accumulator` command -```ShellSession -$ make clean delete_env server=local -``` +I am not going to go into depth for each available setting because you can use the three yml files as +templates. The only thing that should be mentioned is that I am using environmental variables to set +most of the values. For example: `db_name: !ENV ${MYSQL_DB_NAME}`. You can replace +the `!ENV ${MYSQL_DB_NAME}` +part with the actual value, for example: `db_name: My_Database`. For more details on how to use env +variables check [these instructions](https://pypi.org/project/yaml-config-wrapper/). -### Create a new virtual environment +### Specify the pool of comments -For creating a conda virtual environment run: +Now, you don't want the bot to post the same comment over and over again. For that reason, I am using a +pool of available comments, and bot automatically picks one that hasn't been commented to the +respective channel yet, otherwise it picks the one that was posted the longest time ago. Just create +a `default.txt` file in a folder named `comments` and write one comment per line. If, for a specific +channel, you want to have additional comments, create another txt file named after the channel's id. +For example you can create a `UC-ImLFXGIe2FC4Wo5hOodnw.txt` for the Veritasium YT channel. -```ShellSession -$ make create_env server=local -``` +### Start following channels -### Build Locally (and install requirements) +We are now ready to add YT channels to our following list (stored in the DB). After ensuring you are in +the conda environment, use the following command to add channels: -To build the project locally using the setup.py install command (which also installs the requirements), -execute the following command: +Using the channel ID ```ShellSession -$ make setup server=local +$ python youbot/run.py -c confs/generic.yml -l logs/generic.log -m add_channel -i ``` -### Run the tests - -The tests are located in the `tests` folder. To run all of them, execute the following command: +Using the channel username (Fails most of the time) ```ShellSession -$ make run_tests server=local +$ python youbot/run.py -c confs/generic.yml -l logs/generic.log -m add_channel -u ``` -## Running the code locally - -In order to run the code, you will only need to change the yml file if you need to, and either run its -file directly or invoke its console script. - -If you don't need to change yml file, skip to [Execution Options](#execution_options). - -### Modifying the Configuration - -There is two already configured yml files under [confs/template_conf.yml](confs/template_conf.yml) with -the following structure: - -```yaml -tag: production -cloudstore: - config: - api_key: !ENV ${DROPBOX_API_KEY} - type: dropbox -datastore: - config: - hostname: !ENV ${MYSQL_HOST} - username: !ENV ${MYSQL_USERNAME} - password: !ENV ${MYSQL_PASSWORD} - db_name: !ENV ${MYSQL_DB_NAME} - port: 3306 - type: mysql -email_app: - config: - email_address: !ENV ${EMAIL_ADDRESS} - api_key: !ENV ${GMAIL_API_KEY} - type: gmail -``` - -The `!ENV` flag indicates that you are passing an environmental value to this attribute. You can change -the values/environmental var names as you wish. If a yaml variable name is changed/added/deleted, the -corresponding changes should be reflected on the [yml_schema.json](configuration/yml_schema.json) too -which validates it. - -### Set the required environment variables - -In order to run the [main.py](youbot/main.py) you will need to set the -environmental variables you are using in your configuration yml file. Example: +To view the followed channels run: ```ShellSession -$ export DROPBOX_API_KEY=123 -$ export MYSQL_HOST=foo.rds.amazonaws.com -$ export MYSQL_USERNAME=user -$ export MYSQL_PASSWORD=pass -$ export MYSQL_DB_NAME=Test_schema -$ export EMAIL_ADDRESS=Gmail Bot -$ export GMAIL_API_KEY=123 +$ python youbot/run.py -c confs/generic.yml -l logs/generic.log -m list_channels ``` -The best way to do that, is to create a .env file ([example](env_example)), and source it before -running the code. - -### Execution Options - -First, make sure you are in the correct virtual environment: +There is also the option to set the priorities of each channel. If 2 or more channels post videos at +the same time, the bot will leave comments first to the ones with the highest priority value. To do so +run the following: ```ShellSession -$ conda activate youbot - -$ which python -/home/drkostas/anaconda3/envs/youbot/bin/python - +$ python youbot/run.py -c confs/generic.yml -l logs/generic.log -m set_priority --priority -i ``` -#### YoutubeCommentBot Main - -Now, in order to run the code you can either call the [main.py](youbot/main.py) -directly, or invoke the `youbot_main` -console script. +After you're done, you can optionally populate the table with each channel's profile picture: ```ShellSession -$ python youbot/main.py --help -usage: main.py -c CONFIG_FILE [-m {run_mode_1,run_mode_2,run_mode_3}] [-l LOG] [-d] [-h] - -A bot that takes a list of youtube channels and posts the first comment in every new video. - -Required Arguments: - -c CONFIG_FILE, --config-file CONFIG_FILE - The configuration yml file - -Optional Arguments: - -m {run_mode_1,run_mode_2,run_mode_3}, --run-mode {run_mode_1,run_mode_2,run_mode_3} - Description of the run modes - -l LOG, --log LOG Name of the output log file - -d, --debug Enables the debug log messages - -h, --help Show this help message and exit +$ python youbot/run.py -c confs/generic.yml -l logs/generic.log -m refresh_photos +``` +## Run the Bot -# Or +Now we are ready to run the commenter module of the bot. Assuming you set up the channels, created the +configuration, and you have the comments ready, run the following command: -$ youbot_main --help -usage: main.py -c CONFIG_FILE [-m {run_mode_1,run_mode_2,run_mode_3}] [-l LOG] [-d] [-h] +```ShellSession +python youbot/run.py -c confs/commenter.yml -l logs/commenter.log -m commenter +``` -A bot that takes a list of youtube channels and posts the first comment in every new video. +The bot will then run indefinitely until you stop it. -Required Arguments: - -c CONFIG_FILE, --config-file CONFIG_FILE - The configuration yml file +You can view all the comments posted at any point with the following command: -Optional Arguments: - -m {run_mode_1,run_mode_2,run_mode_3}, --run-mode {run_mode_1,run_mode_2,run_mode_3} - Description of the run modes - -l LOG, --log LOG Name of the output log file - -d, --debug Enables the debug log messages - -h, --help Show this help message and exit +```ShellSession +python youbot/run.py -c confs/generic.yml -l logs/generic.log -m list_comments --n-recent 10 ``` -#### YoutubeCommentBot CLI +## Gathering statistics about the comments -There is also a [cli.py](youbot/cli.py) which you can also invoke it by its -console script too -(`cli`). +Now that the bot is running, you probably want to gather statistics about the comments such as the +number of likes and replies. There is another bot for that job, that also runs indefinitely and +constantly updates the data in the `comments` table. To start it run the following command: ```ShellSession -$ cli --help -Usage: cli [OPTIONS] COMMAND [ARGS]... +python youbot/run.py -c confs/accumulator.yml -l logs/accumulator.log -m accumulator +``` -Options: - --install-completion [bash|zsh|fish|powershell|pwsh] - Install completion for the specified shell. - --show-completion [bash|zsh|fish|powershell|pwsh] - Show completion for the specified shell, to - copy it or customize the installation. +## Using Dropbox - --help Show this message and exit. +There is the option to also incorporate dropbox in the whole pipeline. Assuming you already created an +Api key and added a cloudstore section in the config, you can use the following options: -Commands: - bye - hello -``` +- `load_keys_from_cloud: true` (under youtube config): If set to true, the bot will automatically copy + the json keys from the defined `keys_folder_path` setting (in cloudstore config) to the defined + `keys` setting (in youtube config). This is very useful if you deploy the bot to heroku which is + stateless and any newly created file can be deleted anytime. So you may have to manually recreate the + keys. +- `upload_logs_every: 15` (under cloudstore config): If you configured the cloudstore config for the + commenter, then the bot will automatically copy the log file to the cloudstore `logs_folder_path` + every 15 `While: True` loops in the commenter function. Again, very useful for keeping the logs while + running on Heroku. +- `comments: type: dropbox`: If you set the type of the `comments` setting as `dropbox` then the + commenter will download the comment txt files from `dropbox_folder_name` into `local_folder_name` + before every `While: True` loop in the commenter. Useful for modifying the comments when running on + Heroku. -## Deployment +## Deployment on Heroku The deployment is being done to Heroku. For more information you can check the [setup guide](https://devcenter.heroku.com/articles/getting-started-with-python). -Make sure you check the -defined [Procfile](Procfile) ([reference](https://devcenter.heroku.com/articles/getting-started-with-python#define-a-procfile)) -and that you set -the [above-mentioned environmental variables](#env_variables) ([reference](https://devcenter.heroku.com/articles/config-vars)) -. +Make sure you check the defined [Procfile](Procfile) +([reference](https://devcenter.heroku.com/articles/getting-started-with-python#define-a-procfile)) +and that you set the appropriate environmental variables +([reference](https://devcenter.heroku.com/articles/config-vars)). ## Continuous Integration For the continuous integration, the CircleCI service is being used. For more information you can check the [setup guide](https://circleci.com/docs/2.0/language-python/). -Again, you should set -the [above-mentioned environmental variables](#env_variables) ([reference](https://circleci.com/docs/2.0/env-vars/#setting-an-environment-variable-in-a-context)) +Again, you should set the appropriate environmental variables +([reference](https://circleci.com/docs/2.0/env-vars/#setting-an-environment-variable-in-a-context)) and for any modifications, edit the [circleci config](/.circleci/config.yml). ## TODO Read the [TODO](TODO.md) to see the current task list. -## Built With - -* [Dropbox Python API](https://www.dropbox.com/developers/documentation/python) - Used for the Cloudstore Class -* [Gmail Sender](https://github.com/paulc/gmail-sender) - Used for the EmailApp Class -* [Heroku](https://www.heroku.com) - The deployment environment -* [CircleCI](https://www.circleci.com/) - Continuous Integration service - ## License This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. -## Acknowledgments - -* Thanks to PurpleBooth for - the [README template](https://gist.github.com/PurpleBooth/109311bb0361f32d87a2) - diff --git a/TODO.md b/TODO.md index 697974b..53b5f3e 100644 --- a/TODO.md +++ b/TODO.md @@ -19,8 +19,8 @@ See the [issues](https://github.com/drkostas/youbot/issues) too. - [X] Option to set username manually - [X] Test that everything works properly - [X] Configure Procfile and circleci config -- [ ] Update Readme -- [ ] Recreate the Livestreaming module +- [X] Update Readme - [ ] Use multiple accounts (different api keys) to check for new comments - [ ] Improve the YouTube api functions used (Activities api func - https://developers.google.com/youtube/v3/docs/activities/list) - [ ] Send me email on fatal error (on later version) +- [ ] Recreate the Livestreaming module diff --git a/img/snek.png b/img/snek.png deleted file mode 100644 index e69de29..0000000 From 85af6db71b8fef86df48f50517ff87ff33524144 Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 17:40:10 -0400 Subject: [PATCH 32/33] Update .circleci/config --- .circleci/config.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 257ac44..da70d40 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,8 +8,4 @@ jobs: # A basic unit of work in a run - image: circleci/python:3.8 steps: # steps that comprise the `build` job - checkout # check out source code to working directory - - run: make clean env=venv - - run: make create_env env=venv - - run: make requirements env=venv - - run: make setup env=venv - - run: make run_tests env=venv + - run: make install env=venv From a45344a9edf65b925c2178086daa4381db4a1eff Mon Sep 17 00:00:00 2001 From: drkostas Date: Sun, 29 May 2022 17:47:30 -0400 Subject: [PATCH 33/33] Readme is done --- README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 737fa0a..7cee761 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ A bot that leaves the first comment on every new video of specified channels. -DISCLAIMER: This project is built for educational purposes. DO NOT use it to create spam-bots. +DISCLAIMER: This project is built for educational purposes. DO NOT use it to create spam-bots. Current modules: @@ -39,7 +39,7 @@ Current modules: - List Comments: It lists all the Comments posted by the bot - Add Channel: It adds a new channel to the following list - Set Priority: It set the comment priority of a specified channel -- Refresh Photo: It gathers and populates the `channels` table in the DB with urls to the Channels' +- Refresh Photo: It gathers and populates the `channels` table in the DB with URLs to the Channels' profile photos ## Getting Started @@ -62,8 +62,8 @@ $ echo $SHELL ``` -This project requires a MySQL database and a YouTube Api key. Optionally, you can also set up a Dropbox -Api key which is very useful for when you use Heroku to deploy the bot. +This project requires a MySQL database and a YouTube API key. Optionally, you can also set up a Dropbox +API key which is very useful when you use Heroku to deploy the bot. References: @@ -81,7 +81,7 @@ References: - MySQL: If you don't ha DB already, you can create one for free with Amazon RDS: [Reference 1](https://aws.amazon.com/rds/free/), [Reference 2](https://bigdataenthusiast.wordpress.com/2016/03/05/aws-rds-instance-setup-oracle-db-on-cloud-free-tier/) -- Dropbox: How to set up an Api key for your Dropbox account: +- Dropbox: How to set up an API key for your Dropbox account: [Reference 1](http://99rabbits.com/get-dropbox-access-token/), [Reference 2](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) @@ -92,12 +92,12 @@ and preparing the DB tables. ### Install the requirements -All the installation steps are being handled by the [Makefile](Makefile). By default, it uses `conda` +All the installation steps are handled by the [Makefile](Makefile). By default, it uses `conda` environments. If you want to use `virtualenv` instead, append to every `make` command the flag: `env=venv`. If you want to modify the name of the environment or use another python version, modify the first lines of the [Makefile](Makefile). -Deactivate and active conda environment, install the requirements and load the newly created +Deactivate and active Conda environment, install the requirements and load the newly created environment: ```ShellSession @@ -108,7 +108,7 @@ $ conda activate youbot ### Create the config files -The project uses YML config file along with command line arguments. There are three configs I am using: +The project uses YML config files along with command-line arguments. There are three configs I am using: - [generic.yml](confs/generic.yml): Used for running the following commands: - list_channels @@ -119,7 +119,7 @@ The project uses YML config file along with command line arguments. There are th - [commenter.yml](confs/commenter.yml): Used to run the `commenter` command - [accumulator.yml](confs/accumulator.yml): Used to run the `accumulator` command -I am not going to go into depth for each available setting because you can use the three yml files as +I am not going to go into depth for each available setting because you can use the three YML files as templates. The only thing that should be mentioned is that I am using environmental variables to set most of the values. For example: `db_name: !ENV ${MYSQL_DB_NAME}`. You can replace the `!ENV ${MYSQL_DB_NAME}` @@ -129,16 +129,16 @@ variables check [these instructions](https://pypi.org/project/yaml-config-wrappe ### Specify the pool of comments Now, you don't want the bot to post the same comment over and over again. For that reason, I am using a -pool of available comments, and bot automatically picks one that hasn't been commented to the -respective channel yet, otherwise it picks the one that was posted the longest time ago. Just create +pool of available comments, and the bot automatically picks one that hasn't been commented on to the +respective channel yet, otherwise, it picks the one that was posted the longest time ago. Just create a `default.txt` file in a folder named `comments` and write one comment per line. If, for a specific channel, you want to have additional comments, create another txt file named after the channel's id. -For example you can create a `UC-ImLFXGIe2FC4Wo5hOodnw.txt` for the Veritasium YT channel. +For example, you can create a `UC-ImLFXGIe2FC4Wo5hOodnw.txt` for the Veritasium YT channel. ### Start following channels We are now ready to add YT channels to our following list (stored in the DB). After ensuring you are in -the conda environment, use the following command to add channels: +the Conda environment, use the following command to add channels: Using the channel ID @@ -202,11 +202,11 @@ python youbot/run.py -c confs/accumulator.yml -l logs/accumulator.log -m accumul ## Using Dropbox There is the option to also incorporate dropbox in the whole pipeline. Assuming you already created an -Api key and added a cloudstore section in the config, you can use the following options: +API key and added a cloudstore section in the config, you can use the following options: - `load_keys_from_cloud: true` (under youtube config): If set to true, the bot will automatically copy - the json keys from the defined `keys_folder_path` setting (in cloudstore config) to the defined - `keys` setting (in youtube config). This is very useful if you deploy the bot to heroku which is + the JSON keys from the defined `keys_folder_path` setting (in cloudstore config) to the defined + `keys` setting (in youtube config). This is very useful if you deploy the bot to Heroku which is stateless and any newly created file can be deleted anytime. So you may have to manually recreate the keys. - `upload_logs_every: 15` (under cloudstore config): If you configured the cloudstore config for the @@ -220,7 +220,7 @@ Api key and added a cloudstore section in the config, you can use the following ## Deployment on Heroku -The deployment is being done to Heroku. For more information you can check +The deployment is being done to Heroku. For more information, you can check the [setup guide](https://devcenter.heroku.com/articles/getting-started-with-python). Make sure you check the defined [Procfile](Procfile) @@ -230,7 +230,7 @@ and that you set the appropriate environmental variables ## Continuous Integration -For the continuous integration, the CircleCI service is being used. For more information you can +For the continuous integration, the CircleCI service is being used. For more information, you can check the [setup guide](https://circleci.com/docs/2.0/language-python/). Again, you should set the appropriate environmental variables