diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..bf6c524 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI +on: [push, pull_request] + +jobs: + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install stable toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: clippy, rustfmt + + - name: cargo clippy + run: cargo clippy + + - name: cargo fmt + run: cargo fmt --check + + - name: cargo test + run: cargo test diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..73ec4a5 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,25 @@ +name: Release + +permissions: + pull-requests: write + contents: write + +on: + workflow_dispatch: + +jobs: + release-plz: + name: Release-plz + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + - name: Run release-plz + uses: MarcoIeni/release-plz-action@v0.5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..f5499a2 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,8 @@ +# [Code of Conduct](code-of-conduct) + +The Rust Foundation has adopted a Code of Conduct that we expect project +participants to adhere to. Please read +[the full text][code-of-conduct] +so that you can understand what actions will and will not be tolerated. + +[code-of-conduct]: https://foundation.rust-lang.org/policies/code-of-conduct/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..9fdd7ec --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# Contributing to a Rust Foundation Project + +Thank you for your interest in contributing to this Rust Foundation project. +We are happy and excited to review and accept your pull requests. + +## Before You Begin Contributing + +### Licenses + +There is no Contributor License Agreement to sign to contribute this project. +Your contribution will be covered by the license(s) granted for this +repository, commonly MIT, Apache, and/or CC-BY, but could be a different +license. In other words, your contribution will be licensed to the Foundation +and all downstream users under those licenses. You can read more in the +Foundation's [intellectual property policy][ip-policy]. + +### Code of Conduct + +Please review and adhere to the [code of conduct](CODE_OF_CONDUCT.md) before +contributing any pull requests. + +## Contribution Process + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult [GitHub Help][pull-requests] +for more information on using pull requests. + +### Issues + +Do you just want to file an issue for the project? Please do so in GitHub under +the `Issues` tab. + +[ip-policy]: https://foundation.rust-lang.org/policies/intellectual-property-policy/ +[pull-requests]: https://help.github.com/articles/about-pull-requests/ diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..afbd9f2 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,14 @@ +This project is dual licensed under Apache 2.0 and MIT terms with the exception +of documentation (e.g., `.md` docs in a `/docs` folder) which is licensed under +Creative Commons Attribution 4.0 International. + +Copyrights in a Rust Foundation project are retained by their contributors. No +copyright assignment is required to contribute to the Rust project. + +While not the default, some files may include explicit copyright notices +and/or license notices. + +Except as otherwise noted (e.g., with documentation), Rust is licensed under the +Apache License, Version 2.0 or + or the MIT license or +, at your option. diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..53e537c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,427 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflip" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7006e6395f41477e1052dfff8381b32c0d740c2ff3d48ffd72d19132293b4578" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824956d0dca8334758a5b7f7e50518d66ea319330cbceedcf76905c2f6ab30e3" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "122ec64120a49b4563ccaedcbea7818d069ed8e9aa6d829b82d8a4128936b2ab" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "typomania" +version = "0.1.0" +dependencies = [ + "bitflip", + "clap", + "itertools", + "rayon", + "thiserror", + "tracing", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..74de527 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "typomania" +version = "0.1.0" +edition = "2021" +description = " A toolbox to check for typosquatting in package registries" +keywords = ["typosquatting", "typosquat", "typogard"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/rustfoundation/typomania" +rust-version = "1.56.1" + +[dependencies] +bitflip = "0.1.0" +itertools = "0.11.0" +rayon = { version = "1.7.0", optional = true } +thiserror = "1.0.47" +tracing = "0.1.37" + +[features] +default = ["rayon"] +rayon = ["dep:rayon"] + +[dev-dependencies] +clap = { version = "4.4.5", features = ["derive"] } + +[[example]] +name = "registry" +path = "examples/registry.rs" +required-features = ["rayon"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..9cf1062 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,19 @@ +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSE-documentation b/LICENSE-documentation new file mode 100644 index 0000000..a6d7fd3 --- /dev/null +++ b/LICENSE-documentation @@ -0,0 +1,384 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + +b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + +c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + +d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + +e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + +f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + +g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + +h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + +i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + +j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + +k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + +Section 2 -- Scope. + +a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + +b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + +a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + +a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + +b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + +c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + +a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + +b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + +c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + +Section 6 -- Term and Termination. + +a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + +b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + +c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + +d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + +Section 7 -- Other Terms and Conditions. + +a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + +b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + +Section 8 -- Interpretation. + +a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + +b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + +c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + +d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + +======================================================================= + +Creative Commons is not a party to its public licenses. +Notwithstanding, Creative Commons may elect to apply one of its public +licenses to material it publishes and in those instances will be +considered the "Licensor." Except for the limited purpose of indicating +that material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the public +licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/README.md b/README.md new file mode 100644 index 0000000..4eda1da --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +# typomania + +The typomania project is a port to Rust of the excellent [`typogard`][typogard], +originally by a team led by Matthew Taylor at the University of Kansas and +published alongside the [_Defending Against Package Typosquatting_][paper] +paper, and adapted by [Dan Gardner][dangardner] for crates.io specifically. + +Rather than being hard coded to a specific registry, this crate provides the +same set of primitives that `typogard` uses to detect potential typosquatting as +a reusable library that can be adapted to any registry by implementing the +traits provided in this crate. + +## Features + +* `rayon` (enabled by default): enables `Harness::check`, which provides + functionality to check many packages in parallel using Rayon. + +## Examples + +### Fake registry + +A basic example is provided in [`examples/registry.rs`](examples/registry.rs) +that fakes a registry and then matches packages against it. To see it operate +with some packages that generate potential typosquats, try: + +```bash +cargo run --example registry -- -t abc,foo,foo-2 foo2 abd +``` + +### crates.io + +An example project that uses this crate to analyse a crates.io database dump can +be found at [`typomania-crates`][typomania-crates]. + +## [Code of Conduct][code-of-conduct] + +The Rust Foundation has adopted a Code of Conduct that we expect project +participants to adhere to. Please read +[the full text][code-of-conduct] +so that you can understand what actions will and will not be tolerated. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md). + +## Licenses + +Rust is primarily distributed under the terms of both the MIT license and the +Apache License (Version 2.0), with documentation portions covered by the +Creative Commons Attribution 4.0 International license.. + +See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT), +[LICENSE-documentation](LICENSE-documentation), and +[COPYRIGHT](COPYRIGHT) for details. + +You can also read more under the Foundation's [intellectual property +policy][ip-policy]. + +## Other Policies + +You can read about other Rust Foundation policies in the footer of the +Foundation [website][foundation-website]. + +[code-of-conduct]: https://foundation.rust-lang.org/policies/code-of-conduct/ +[dangardner]: https://github.com/dangardner/typogard +[foundation-website]: https://foundation.rust-lang.org +[ip-policy]: https://foundation.rust-lang.org/policies/intellectual-property-policy/ +[media-guide and trademark]: https://foundation.rust-lang.org/policies/logo-policy-and-media-guide/ +[paper]: https://dl.acm.org/doi/10.1007/978-3-030-65745-1_7 +[rust-foundation]: https://foundation.rust-lang.org/ +[typogard]: https://github.com/mt3443/typogard +[typomania-crates]: https://github.com/rustfoundation/typomania-crates diff --git a/examples/registry.rs b/examples/registry.rs new file mode 100644 index 0000000..457a7e1 --- /dev/null +++ b/examples/registry.rs @@ -0,0 +1,169 @@ +use std::collections::{HashMap, HashSet}; + +use clap::Parser; +use typomania::{ + checks::{Bitflips, Omitted, SwappedWords, Typos}, + AuthorSet, Corpus, Harness, Package, +}; + +#[derive(Debug, Parser)] +struct Opt { + /// Valid characters in package names + #[arg( + long, + default_value = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890-_" + )] + alphabet: String, + + /// Package names to consider top (or popular) packages, delimited by commas + #[arg(short, long, value_name = "PACKAGE", value_delimiter = ',')] + top_packages: Vec, + + /// Packages to check against the top packages + #[arg(value_name = "PACKAGE")] + packages: Vec, +} + +fn main() -> typomania::Result<()> { + let opt = Opt::parse(); + + // Build a corpus of the top packages that we want to match against. + let corpus = TopPackages::from(opt.top_packages); + + // Build a harness that uses the checks built into typomania. + let harness = Harness::builder() + .with_check(Bitflips::new( + &opt.alphabet, + corpus.0.keys().map(|s| s.as_str()), + )) + .with_check(Omitted::new(&opt.alphabet)) + .with_check(SwappedWords::new("-_.")) + .with_check(Typos::new(TYPOS.iter().map(|(c, typos)| { + (*c, typos.iter().map(|ss| ss.to_string()).collect()) + }))) + .build(corpus); + + // Actually check the given packages. + for (name, squats) in harness + .check(opt.packages.into_iter().map(|name| { + let package: Box = Box::new(FakePackage::new(&name)); + (name, package) + }))? + .into_iter() + { + println!("{name}: {squats:?}"); + } + + Ok(()) +} + +struct TopPackages(HashMap); + +impl From> for TopPackages { + fn from(value: Vec) -> Self { + Self( + value + .into_iter() + .map(|name| { + let package = FakePackage::new(&name); + (name, package) + }) + .collect(), + ) + } +} + +impl Corpus for TopPackages { + fn contains_name(&self, name: &str) -> typomania::Result { + Ok(self.0.contains_key(name)) + } + + fn get(&self, name: &str) -> typomania::Result> { + Ok(self + .0 + .get(name) + .map(|package| package as &dyn typomania::Package)) + } +} + +struct FakePackage { + authors: HashSet, + description: String, +} + +impl FakePackage { + fn new(name: &str) -> Self { + Self { + // We'll set up a fake author based on the name so that there's no possibility of + // having a match excluded because of a shared author. + authors: [format!("{name} author <{name}@example.com>")] + .into_iter() + .collect(), + description: format!("{name} is a package that does {name}"), + } + } +} + +impl Package for FakePackage { + fn authors(&self) -> &dyn AuthorSet { + self + } + + fn description(&self) -> Option<&str> { + Some(&self.description) + } + + fn shared_authors(&self, other: &dyn AuthorSet) -> bool { + self.authors.iter().any(|author| other.contains(author)) + } +} + +impl AuthorSet for FakePackage { + fn contains(&self, author: &str) -> bool { + self.authors.contains(author) + } +} + +// This is based on a pre-existing list we've used with crates.io for "easily confused characters". +// (I'm not really sure that I consider all of these easily confused, but it's better than nothing.) +static TYPOS: &[(char, &[&str])] = &[ + ('1', &["2", "q", "i", "l"]), + ('2', &["1", "q", "w", "3"]), + ('3', &["2", "w", "e", "4"]), + ('4', &["3", "e", "r", "5"]), + ('5', &["4", "r", "t", "6", "s"]), + ('6', &["5", "t", "y", "7"]), + ('7', &["6", "y", "u", "8"]), + ('8', &["7", "u", "i", "9"]), + ('9', &["8", "i", "o", "0"]), + ('0', &["9", "o", "p", "-"]), + ('-', &["_", "0", "p", ".", ""]), + ('_', &["-", "0", "p", ".", ""]), + ('q', &["1", "2", "w", "a"]), + ('w', &["2", "3", "e", "s", "a", "q", "vv"]), + ('e', &["3", "4", "r", "d", "s", "w"]), + ('r', &["4", "5", "t", "f", "d", "e"]), + ('t', &["5", "6", "y", "g", "f", "r"]), + ('y', &["6", "7", "u", "h", "t", "i"]), + ('u', &["7", "8", "i", "j", "y", "v"]), + ('i', &["1", "8", "9", "o", "l", "k", "j", "u", "y"]), + ('o', &["9", "0", "p", "l", "i"]), + ('p', &["0", "-", "o"]), + ('a', &["q", "w", "s", "z"]), + ('s', &["w", "d", "x", "z", "a", "5"]), + ('d', &["e", "r", "f", "c", "x", "s"]), + ('f', &["r", "g", "v", "c", "d"]), + ('g', &["t", "h", "b", "v", "f"]), + ('h', &["y", "j", "n", "b", "g"]), + ('j', &["u", "i", "k", "m", "n", "h"]), + ('k', &["i", "o", "l", "m", "j"]), + ('l', &["i", "o", "p", "k", "1"]), + ('z', &["a", "s", "x"]), + ('x', &["z", "s", "d", "c"]), + ('c', &["x", "d", "f", "v"]), + ('v', &["c", "f", "g", "b", "u"]), + ('b', &["v", "g", "h", "n"]), + ('n', &["b", "h", "j", "m"]), + ('m', &["n", "j", "k", "rn"]), + ('.', &["-", "_", ""]), +]; diff --git a/src/checks/bitflips.rs b/src/checks/bitflips.rs new file mode 100644 index 0000000..0fe9b5c --- /dev/null +++ b/src/checks/bitflips.rs @@ -0,0 +1,97 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use thiserror::Error; +use tracing::instrument; + +use crate::{BoxError, Corpus, Package}; + +use super::{Check, Squat}; + +/// Checks whether the package is a bitflipped version of a package in the corpus. +/// +/// This attempts to detect [bitsquatting attacks][bitsquatting]. +/// +/// [bitsquatting]: https://en.wikipedia.org/wiki/Bitsquatting +pub struct Bitflips { + bitflips: BTreeMap>, + names: Vec, +} + +impl Bitflips { + /// Instantiates a bitflip check. + /// + /// `alphabet` is the list of characters that are valid in a package name. + /// + /// `names` is generally the same set of names that exist in the top package corpus: a local + /// copy is required so that the list of possible bitflips can be generated during + /// instantiation, rather than having to recalculate the list each time the check is run. + #[instrument(level = "TRACE", skip(names))] + pub fn new<'a>(alphabet: &str, names: impl Iterator) -> Self { + let alphabet: BTreeSet = alphabet.chars().collect(); + let mut bitflips: BTreeMap> = BTreeMap::new(); + let mut cloned_names = Vec::new(); + + for (i, name) in names.enumerate() { + cloned_names.push(name.into()); + for bitflipped_name in + bitflip::ascii_str(name).filter(|bf| bf.chars().all(|c| alphabet.contains(&c))) + { + bitflips.entry(bitflipped_name).or_default().push(i); + } + } + + Self { + bitflips, + names: cloned_names, + } + } +} + +impl Check for Bitflips { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + if let Some(indices) = self.bitflips.get(name) { + for index in indices.iter().copied() { + let name_to_check = self.names.get(index).ok_or(Error::OutOfRangeIndex { + index, + len: self.names.len(), + })?; + if corpus.possible_squat(name_to_check, name, package)? { + squats.push(Squat::Bitflip(name_to_check.into())) + } + } + } + + Ok(squats) + } +} + +#[derive(Error, Debug)] +enum Error { + #[error(transparent)] + Corpus(#[from] BoxError), + + #[error("unexpected out of range index {index} in vec of length {len}")] + OutOfRangeIndex { index: usize, len: usize }, +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_bitflips() -> crate::Result<()> { + assert_check(Bitflips::new("abcdef", ["ab"].into_iter()), "ac", &["ab"])?; + + // Even more limited alphabet. + assert_check(Bitflips::new("ab", ["ab"].into_iter()), "ac", &[]) + } +} diff --git a/src/checks/mod.rs b/src/checks/mod.rs new file mode 100644 index 0000000..11c6802 --- /dev/null +++ b/src/checks/mod.rs @@ -0,0 +1,140 @@ +//! Checks provided by typomania, along with the traits and types required to define custom checks. +//! +//! To implement a custom check, implement the [`Check`] trait, and have it return one or more +//! [`Squat`]s when the package may be squatting one or more packages in the corpus. + +use std::fmt::Display; + +use crate::{Corpus, Package}; + +mod bitflips; +mod omitted; +mod repeated; +mod swapped; +mod typos; +mod util; +mod version; + +#[cfg(test)] +mod testutil; + +pub use bitflips::Bitflips; +pub use omitted::Omitted; +pub use repeated::Repeated; +pub use swapped::{Characters as SwappedCharacters, Words as SwappedWords}; +pub use typos::Typos; +pub use version::Version; + +/// A check that compares the given package to the existing corpus. +pub trait Check: Sync + Send { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result>; +} + +/// A potential typosquat. +#[derive(Debug, Clone)] +pub enum Squat { + Bitflip(String), + OmittedCharacter(String), + RepeatedCharacter(String), + SwappedCharacters(String), + SwappedWords(String), + Typo(String), + Version(String), + Custom { message: String, package: String }, +} + +impl Squat { + /// Instantiate a custom squat. + pub fn custom(message: &str, package: &str) -> Self { + Self::Custom { + message: message.into(), + package: package.into(), + } + } +} + +impl Display for Squat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Squat::Bitflip(package) => write!(f, "may be a bitflip of {package}"), + Squat::OmittedCharacter(package) => write!(f, "omits characters in {package}"), + Squat::RepeatedCharacter(package) => write!(f, "repeats characters in {package}"), + Squat::SwappedCharacters(package) => write!(f, "swaps characters in {package}"), + Squat::SwappedWords(package) => write!(f, "swaps words in {package}"), + Squat::Typo(package) => write!(f, "uses a common typo for {package}"), + Squat::Version(package) => write!(f, "only changes the version from {package}"), + Squat::Custom { message, package } => write!(f, "{message} for {package}"), + } + } +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + + use super::{testutil::TestPackage, *}; + + struct SimpleCorpus(HashMap); + + impl Corpus for SimpleCorpus { + fn contains_name(&self, name: &str) -> crate::Result { + Ok(self.0.contains_key(name)) + } + + fn get(&self, name: &str) -> crate::Result> { + Ok(if let Some(package) = self.0.get(name) { + Some(package) + } else { + None + }) + } + } + + #[test] + fn test_possible_squat() -> crate::Result<()> { + let corpus = SimpleCorpus( + [ + ("a", TestPackage::new("adam")), + ("d", TestPackage::default()), + ] + .into_iter() + .map(|(name, package)| (String::from(name), package)) + .collect(), + ); + + #[allow(clippy::bool_assert_comparison)] + { + // Not a possible squat: same package. + assert_eq!( + corpus.possible_squat("a", "a", &TestPackage::default())?, + false + ); + + // Possible squat: no authors in common. (Even though neither package actually has any + // authors at all.) + assert_eq!( + corpus.possible_squat("d", "x", &TestPackage::default())?, + true + ); + + // Not a possible squat: author "adam" in common. + assert_eq!( + corpus.possible_squat("a", "x", &TestPackage::new("adam"))?, + false + ); + + // Possible squat: no authors in common. + assert_eq!( + corpus.possible_squat("a", "x", &TestPackage::default())?, + true + ); + } + + Ok(()) + } +} diff --git a/src/checks/omitted.rs b/src/checks/omitted.rs new file mode 100644 index 0000000..757b3e4 --- /dev/null +++ b/src/checks/omitted.rs @@ -0,0 +1,60 @@ +use crate::Corpus; + +use super::{util, Check, Package, Squat}; + +/// Checks whether a package only differs from a package in the corpus by omitting one character. +pub struct Omitted { + alphabet: Vec, +} + +impl Omitted { + /// Instantiates an omitted character check. + /// + /// `alphabet` is the list of characters that are valid in a package name. + pub fn new(alphabet: &str) -> Self { + Self { + alphabet: alphabet.chars().map(String::from).collect(), + } + } +} + +impl Check for Omitted { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + for i in 0..=name.len() { + for c in self.alphabet.iter() { + let name_to_check = util::rebuild_name(name, i, 0, c); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::OmittedCharacter(name_to_check)); + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_omitted() -> crate::Result<()> { + assert_check( + Omitted::new("abc"), + "xyz", + &[ + "axyz", "bxyz", "cxyz", "xayz", "xbyz", "xcyz", "xyaz", "xybz", "xycz", "xyza", + "xyzb", "xyzc", + ], + ) + } +} diff --git a/src/checks/repeated.rs b/src/checks/repeated.rs new file mode 100644 index 0000000..aa9eae2 --- /dev/null +++ b/src/checks/repeated.rs @@ -0,0 +1,56 @@ +use itertools::Itertools; + +use crate::Corpus; + +use super::{util, Check, Package, Squat}; + +/// Checks whether a package only differs from a package in the corpus by repeating one character. +pub struct Repeated; + +impl Check for Repeated { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + for (i, (a, b)) in name.chars().tuple_windows().enumerate() { + if a == b && a.is_ascii() { + let name_to_check = util::rebuild_name(name, i, 2, &format!("{a}")); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::RepeatedCharacter(name_to_check)); + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_repeated() -> crate::Result<()> { + #[track_caller] + fn test(input: &str, want: &[&str]) -> crate::Result<()> { + assert_check(Repeated, input, want) + } + + test("", &[])?; + test("a", &[])?; + test("aa", &["a"])?; + test("abc", &[])?; + test("abbc", &["abc"])?; + test("abbbc", &["abbc"])?; + test("abbbbc", &["abbbc"])?; + test("aaaaaa", &["aaaaa"])?; + + Ok(()) + } +} diff --git a/src/checks/swapped.rs b/src/checks/swapped.rs new file mode 100644 index 0000000..a900832 --- /dev/null +++ b/src/checks/swapped.rs @@ -0,0 +1,130 @@ +use itertools::Itertools; + +use super::{util, Check, Corpus, Package, Squat}; + +/// Checks whether one or more characters have been swapped in the given package name. +pub struct Characters; + +impl Check for Characters { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + for (i, (a, b)) in name.chars().tuple_windows().enumerate() { + if a != b { + let name_to_check = util::rebuild_name(name, i, 2, &format!("{b}{a}")); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::SwappedCharacters(name_to_check)); + } + } + } + + Ok(squats) + } +} + +/// Checks whether one or more words have been swapped in the given package name. +pub struct Words { + delimiters: Vec, +} + +impl Words { + /// Sets up a swapped word check, using each character in `delimiters` as a possible word + /// delimiter. + pub fn new(delimiters: &str) -> Self { + Self { + delimiters: delimiters.chars().collect(), + } + } +} + +impl Check for Words { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + let tokens: Vec = name + .split(self.delimiters.as_slice()) + .map(String::from) + .collect(); + + // Short circuit if there's still only one token. + let num_tokens = tokens.len(); + if num_tokens == 1 { + return Ok(squats); + } + + for case in tokens.into_iter().permutations(num_tokens) { + for delimiter in self.delimiters.iter() { + let name_to_check = case.join(&format!("{delimiter}")); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::SwappedWords(name_to_check)); + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_characters() -> crate::Result<()> { + #[track_caller] + fn test(input: &str, want: &[&str]) -> crate::Result<()> { + assert_check(Characters, input, want) + } + + test("", &[])?; + test("a", &[])?; + test("ab", &["ba"])?; + test("abc", &["bac", "acb"])?; + + Ok(()) + } + + #[test] + fn test_words() -> crate::Result<()> { + #[track_caller] + fn test(input: &str, want: &[&str]) -> crate::Result<()> { + assert_check(Words::new("-_"), input, want) + } + + test("", &[])?; + test("a", &[])?; + test("abc", &[])?; + test("abc-def", &["abc_def", "def-abc", "def_abc"])?; + test( + "abc-def_ghi", + &[ + "abc_def_ghi", + "abc-def-ghi", + "abc_ghi_def", + "abc-ghi-def", + "def_abc_ghi", + "def-abc-ghi", + "def_ghi_abc", + "def-ghi-abc", + "ghi_abc_def", + "ghi-abc-def", + "ghi_def_abc", + "ghi-def-abc", + ], + )?; + + Ok(()) + } +} diff --git a/src/checks/testutil.rs b/src/checks/testutil.rs new file mode 100644 index 0000000..637fb9a --- /dev/null +++ b/src/checks/testutil.rs @@ -0,0 +1,123 @@ +use std::{ + collections::{HashMap, HashSet}, + sync::RwLock, +}; + +use crate::AuthorSet; + +use super::{Check, Corpus, Package}; + +#[derive(Debug, Clone, Default)] +pub struct TestPackage { + pub authors: HashSet, + pub description: Option, +} + +impl TestPackage { + pub fn new(author: &str) -> Self { + Self { + authors: [String::from(author)].into_iter().collect(), + description: None, + } + } +} + +impl AuthorSet for TestPackage { + fn contains(&self, author: &str) -> bool { + self.authors.contains(author) + } +} + +impl Package for TestPackage { + fn authors(&self) -> &dyn AuthorSet { + self + } + + fn description(&self) -> Option<&str> { + self.description.as_deref() + } + + fn shared_authors(&self, other: &dyn AuthorSet) -> bool { + self.authors.iter().any(|author| other.contains(author)) + } +} + +struct NameTracker { + known: HashMap, + seen: RwLock>, +} + +impl NameTracker { + fn new(known: &str) -> Self { + Self { + known: [String::from(known)] + .into_iter() + .map(|name| { + let package = TestPackage::new(&name); + (name, package) + }) + .collect(), + seen: RwLock::new(HashMap::default()), + } + } + + #[track_caller] + fn assert_contains_exactly(&self, want: &[&str]) { + let mut set = HashSet::new(); + for term in want { + set.insert(String::from(*term)); + } + + let seen: HashSet = self.seen.read().unwrap().keys().cloned().collect(); + + assert_eq!( + seen.symmetric_difference(&set) + .cloned() + .collect::>(), + Vec::::new(), + ); + } +} + +impl Corpus for NameTracker { + fn contains_name(&self, name: &str) -> crate::Result { + Ok(if self.known.contains_key(name) { + true + } else { + self.seen + .write() + .unwrap() + .entry(name.into()) + .or_insert_with(|| TestPackage::new(name)); + false + }) + } + + fn get(&self, name: &str) -> crate::Result> { + Ok(if let Some(package) = self.known.get(name) { + Some(package) + } else { + // By using the package name as the author, no two packages will ever match. + self.seen + .write() + .unwrap() + .entry(name.into()) + .or_insert_with(|| TestPackage::new(name)); + + None + }) + } +} + +#[track_caller] +pub(super) fn assert_check(check: C, input: &str, want: &[&str]) -> crate::Result<()> +where + C: Check, +{ + let names = NameTracker::new(input); + + check.check(&names, input, &TestPackage::new(input))?; + names.assert_contains_exactly(want); + + Ok(()) +} diff --git a/src/checks/typos.rs b/src/checks/typos.rs new file mode 100644 index 0000000..e661796 --- /dev/null +++ b/src/checks/typos.rs @@ -0,0 +1,76 @@ +use std::collections::HashMap; + +use super::{util, Check, Corpus, Package, Squat}; + +/// Checks for common typos. +/// +/// This is a very flexible check that — to some extent — duplicates functionality found in other +/// checks, but is also somewhat annoying to configure. You may not need this check in your +/// standard set, depending on your threat model. +pub struct Typos { + typos: HashMap>, +} + +impl Typos { + /// Instantiates a typo check. + /// + /// Each element in `typos` is used to rebuild the package name when checking. Each character + /// will be replaced by each string in the given vector. For example, if the only typo given is + /// `('a', vec!["bb", "x", ""])`, then a package `apkg` will also be checked agaisnt `bbpkg`, + /// `xpkg`, and `pkg`. + pub fn new(typos: impl Iterator)>) -> Self { + Self { + typos: typos.collect(), + } + } +} + +impl Check for Typos { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + for (i, c) in name.chars().enumerate() { + if let Some(typos) = self.typos.get(&c) { + for typo in typos.iter() { + let name_to_check = util::rebuild_name(name, i, 1, typo); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::Typo(name_to_check)); + } + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_typos() -> crate::Result<()> { + #[track_caller] + fn test(input: &str, want: &[&str]) -> crate::Result<()> { + assert_check( + Typos::new([('a', vec![String::from("ab"), String::from("b")])].into_iter()), + input, + want, + ) + } + + test("", &[])?; + test("x", &[])?; + test("a", &["ab", "b"])?; + test("xax", &["xabx", "xbx"])?; + + Ok(()) + } +} diff --git a/src/checks/util.rs b/src/checks/util.rs new file mode 100644 index 0000000..f130590 --- /dev/null +++ b/src/checks/util.rs @@ -0,0 +1,25 @@ +pub(super) fn rebuild_name(orig: &str, index: usize, replace: usize, replacement: &str) -> String { + format!( + "{before}{replacement}{after}", + before = &orig[0..index], + after = if let Some(after) = orig.get(index + replace..) { + after + } else { + "" + } + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rebuild_name() { + assert_eq!("foobar", rebuild_name("foobar", 3, 0, "")); + assert_eq!("fooxbar", rebuild_name("foobar", 3, 0, "x")); + assert_eq!("fooxar", rebuild_name("foobar", 3, 1, "x")); + assert_eq!("fxbar", rebuild_name("foobar", 1, 2, "x")); + assert_eq!("fxxbar", rebuild_name("foobar", 1, 2, "xx")); + } +} diff --git a/src/checks/version.rs b/src/checks/version.rs new file mode 100644 index 0000000..a030f00 --- /dev/null +++ b/src/checks/version.rs @@ -0,0 +1,50 @@ +use super::{Check, Corpus, Package, Squat}; + +/// Checks whether a package only differs from a package in the corpus by omitting a version +/// number. +pub struct Version; + +impl Check for Version { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let trimmed = name + .trim_end_matches(&['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']) + .trim_end_matches('-'); + + Ok( + if !trimmed.is_empty() + && trimmed != name + && corpus.possible_squat(trimmed, name, package)? + { + vec![Squat::Version(trimmed.into())] + } else { + Vec::new() + }, + ) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_version() -> crate::Result<()> { + assert_check(Version, "", &[])?; + assert_check(Version, "-2", &[])?; + assert_check(Version, "2", &[])?; + assert_check(Version, "abc", &[])?; + assert_check(Version, "abc234", &["abc"])?; + assert_check(Version, "abc-234", &["abc"])?; + assert_check(Version, "abc-", &["abc"])?; + assert_check(Version, "abc0", &["abc"])?; + + Ok(()) + } +} diff --git a/src/corpus.rs b/src/corpus.rs new file mode 100644 index 0000000..370e1ef --- /dev/null +++ b/src/corpus.rs @@ -0,0 +1,61 @@ +//! The [`Corpus`] trait, and utility functions related to implementing it. + +use crate::{Package, Result}; + +/// A corpus of existing, popular packages that checks must be run against. +/// +/// This is implemented by default for `HashMap` and `BTreeMap`. +/// Users with more complex needs can adapt their own package sources, provided they return +/// [`Package`]. +pub trait Corpus: Send + Sync { + fn contains_name(&self, name: &str) -> Result; + fn get(&self, name: &str) -> Result>; + + /// Checks if `corpus_name` — a package in the corpus — should be considered to be squatting + /// package `package`, identified by `package_name`. + /// + /// This can be used to implement filters based on ecosystem-specific knowledge that isn't + /// exposed in the generic [`Package`] trait. + /// + /// The default implementation is [`default_possible_squat`]. Implementors replacing the + /// default implementation may still want to invoke [`default_possible_squat`] before adding + /// their own filtering. + fn possible_squat( + &self, + corpus_name: &str, + package_name: &str, + package: &dyn Package, + ) -> Result { + default_possible_squat(self, corpus_name, package_name, package) + } +} + +/// The default implementation of [`Corpus::possible_squat`], split out for easier reuse in other +/// [`Corpus`] implementations. +/// +/// This implementation checks two things: +/// +/// 1. Is `corpus_name` the same as `package_name`? +/// 1. Does the package in the corpus share any authors with `package`? +/// +/// If either of these checks returns true, then this function returns `false`, as it's assumed +/// that a package cannot squat itself, and that an author cannot squat their own package. +pub fn default_possible_squat( + corpus: &C, + corpus_name: &str, + package_name: &str, + package: &dyn Package, +) -> Result +where + C: Corpus + Send + Sync + ?Sized, +{ + Ok(if corpus_name == package_name { + // The same package can't squat itself. + false + } else if let Some(checked) = corpus.get(corpus_name)? { + // See if there are any shared authors. If not, then this might be squatted. + !checked.shared_authors(package.authors()) + } else { + false + }) +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..975fe3d --- /dev/null +++ b/src/error.rs @@ -0,0 +1,2 @@ +pub type BoxError = Box; +pub type Result = std::result::Result; diff --git a/src/harness.rs b/src/harness.rs new file mode 100644 index 0000000..a0c29c1 --- /dev/null +++ b/src/harness.rs @@ -0,0 +1,146 @@ +use std::marker::PhantomData; + +use itertools::Itertools; +use thiserror::Error; +use tracing::instrument; + +use crate::{ + checks::{Check, Repeated, Squat, SwappedCharacters, Version}, + BoxError, Corpus, Package, +}; + +/// A basic harness that runs its configured checks against one or more potentially typosquatted +/// packages. +/// +/// If the `rayon` feature is enabled, the [`Harness::check`] method can be used to check many +/// packages in parallel, using Rayon for parallelisation. +pub struct Harness +where + C: Corpus + Send + Sync, +{ + checks: Vec>, + corpus: C, +} + +/// A builder for [`Harness`]. +pub struct Builder +where + C: Corpus + Send + Sync, +{ + checks: Vec>, + _marker: PhantomData, +} + +impl Builder +where + C: Corpus + Send + Sync, +{ + fn new() -> Self { + let repeated: Box = Box::new(Repeated); + let swapped_chars: Box = Box::new(SwappedCharacters); + let version: Box = Box::new(Version); + + Self { + checks: Vec::from([repeated, swapped_chars, version]), + _marker: PhantomData, + } + } + + fn empty() -> Self { + Self { + checks: Vec::new(), + _marker: PhantomData, + } + } + + /// Adds a check to the harness. + pub fn with_check(mut self, check: Chk) -> Self + where + Chk: Check + 'static, + { + self.checks.push(Box::new(check)); + self + } + + /// Uses the given corpus to build a harness. + pub fn build(self, corpus: C) -> Harness + where + C: Corpus + Send + Sync + 'static, + { + Harness { + checks: self.checks, + corpus, + } + } +} + +impl Harness +where + C: Corpus + Send + Sync + 'static, +{ + /// Instantiates a builder with three checks configured by default: [`Repeated`], + /// [`SwappedCharacters`], and [`Version`]. + /// + /// These checks are provided by default because they don't require any specific knowledge of + /// the package ecosystem. + pub fn builder() -> Builder { + Builder::new() + } + + /// Instantiates a builder with no checks. + pub fn empty_builder() -> Builder { + Builder::empty() + } + + /// Checks all given packages against the corpus, using Rayon to parallelise the checks. + #[cfg(feature = "rayon")] + #[instrument(level = "DEBUG", skip_all, err)] + pub fn check( + &self, + new_packages: impl Iterator)> + Send, + ) -> Result>, Error> { + use rayon::prelude::*; + + new_packages + .par_bridge() + .into_par_iter() + .filter_map(|(name, package)| match self.check_package(&name, package) { + Ok(squats) if squats.is_empty() => None, + Ok(squats) => Some(Ok((name, squats))), + Err(e) => Some(Err(e)), + }) + .collect() + } + + /// Checks a single package against the corpus using the configured checks. + #[instrument(level = "TRACE", skip(self, package), err)] + pub fn check_package( + &self, + name: &str, + package: Box, + ) -> Result, Error> { + if self.corpus.contains_name(name)? { + return Ok(Vec::new()); + } + + self.checks + .iter() + .map(|check| -> Result, Error> { + Ok(check.check(&self.corpus, name, package.as_ref())?) + }) + .flatten_ok() + .collect() + } +} + +#[derive(Error, Debug)] +pub enum Error { + #[error("corpus error: {0}")] + Corpus(String), +} + +impl From for Error { + fn from(value: BoxError) -> Self { + Self::Corpus(value.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c6b9879 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,40 @@ +//! Checks and a harness to detect potential typosquatting in a package repository. +//! +//! This is ported from [`typogard`][typogard], originally by a team led by Matthew Taylor at the +//! University of Kansas and published alongside the [_Defending Against Package +//! Typosquatting_][paper] paper, and adapted by [Dan Gardner][dangardner] for crates.io +//! specifically. +//! +//! ## Theory of operation +//! +//! Given a [`Corpus`] of popular packages, the checks in the [`checks`] module allow new or +//! interesting packages to be matched against that corpus to look for common typosquatting +//! techniques. Custom checks may also be written by implementing [`checks::Check`]; custom checks +//! should use [`checks::Squat::Custom`] when returning potential typosquats. +//! +//! A [`Harness`] is provided that can be used to run a suite of checks against a single package, +//! or — when the `rayon` feature is enabled — against many packages at once in parallel. +//! +//! Checks and corpora both use instances of [`Package`], which provides a basic lowest common +//! denominator representation of ecosystem-specific packages. Users are expected to implement +//! [`Package`] (and the related [`AuthorSet`]) on their native package type for analysis. +//! +//! ## Tracing +//! +//! Potentially expensive operations are traced using `tracing` at the TRACE level, except for +//! [`Harness::check`], which is traced at the DEBUG level. +//! +//! [dangardner]: https://github.com/dangardner/typogard +//! [paper]: https://dl.acm.org/doi/10.1007/978-3-030-65745-1_7 +//! [typogard]: https://github.com/mt3443/typogard + +pub mod checks; +pub mod corpus; +mod error; +mod harness; +mod package; + +pub use corpus::Corpus; +pub use error::{BoxError, Result}; +pub use harness::{Builder as HarnessBuilder, Error as HarnessError, Harness}; +pub use package::{AuthorSet, Package}; diff --git a/src/package.rs b/src/package.rs new file mode 100644 index 0000000..f8076a1 --- /dev/null +++ b/src/package.rs @@ -0,0 +1,54 @@ +/// Common trait that packages must implement to provide common metadata used by checks and +/// corpora. +/// +/// Note that "author" is simply a string in this data model. However these are represented, these +/// need to be unique within the package ecosystem: registry user names, user IDs, or e-mail +/// addresses would tend to be reasonable candidates to represent an author. +pub trait Package: Send + Sync { + /// Returns an object that can be used to check if one or more authors own this package. + /// + /// See the documentation for [`AuthorSet`] for more detail, but in most cases, this will be + /// implemented as: + /// + /// ```rust + /// # use typomania::{AuthorSet, Package}; + /// # + /// # struct MyPackage; + /// # + /// impl Package for MyPackage { + /// fn authors(&self) -> &dyn AuthorSet { + /// self + /// } + /// + /// // ... + /// # fn description(&self) -> Option<&str> { unimplemented!() } + /// # fn shared_authors(&self, other: &dyn AuthorSet) -> bool { unimplemented!() } + /// } + /// + /// impl AuthorSet for MyPackage { + /// fn contains(&self, author: &str) -> bool { + /// // ... + /// # unimplemented!() + /// } + /// } + /// ``` + fn authors(&self) -> &dyn AuthorSet; + + /// Returns the package description, if it has one. + /// + /// This isn't used by any check shipped by default in typomania, but may be useful for NLP + /// checks: packages that typosquat others will tend to replicate their descriptions, + /// summaries, and/or readmes to confuse their targets further. + fn description(&self) -> Option<&str>; + + /// Checks if any authors on the other [`AuthorSet`] match any authors on this package. + fn shared_authors(&self, other: &dyn AuthorSet) -> bool; +} + +/// Trait that packages must implement to check if they have a particular author. +/// +/// In the vast majority of cases, this will be implemented on the same type as [`Package`]: the +/// existence of this as a separate trait is an unfortunate implementation detail. +pub trait AuthorSet { + fn contains(&self, author: &str) -> bool; +}