diff --git a/Pipfile b/Pipfile index 5f6775c..5db27a9 100644 --- a/Pipfile +++ b/Pipfile @@ -8,3 +8,5 @@ mechanicalsoup = "*" ics = ">=0.6" requests = "*" freezegun = "*" +pycryptodomex = "*" +PyNaCl = "*" diff --git a/fb2cal/facebook_browser.py b/fb2cal/facebook_browser.py index 9778d70..46f23b9 100644 --- a/fb2cal/facebook_browser.py +++ b/fb2cal/facebook_browser.py @@ -2,9 +2,10 @@ import re import requests import json +from bs4 import Tag from .logger import Logger -from .utils import remove_anti_hijacking_protection +from .utils import remove_anti_hijacking_protection, facebook_web_encrypt_password class FacebookBrowser: def __init__(self): @@ -14,47 +15,71 @@ def __init__(self): self.browser.set_user_agent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36') self.__cached_token = None - def authenticate(self, email, password): - """ Authenticate with Facebook setting up session for further requests """ - - FACEBOOK_LOGIN_URL = 'http://www.facebook.com/login.php' + def _get_datr_token_from_html(self, html): FACEBOOK_DATR_TOKEN_REGEXP = r'\"_js_datr\",\"(.*?)\"' regexp = re.compile(FACEBOOK_DATR_TOKEN_REGEXP, re.MULTILINE) - # Add 'datr' cookie to session for countries adhering to GDPR compliance - login_page = self.browser.get(FACEBOOK_LOGIN_URL) + matches = regexp.search(html) + + if not matches or len(matches.groups()) != 1: + self.logger.debug(html) + self.logger.error(f'Match failed or unexpected number of regexp matches when trying to get datr token.') + raise SystemError - if login_page.status_code != 200: - self.logger.debug(login_page.text) - self.logger.error(f'Failed to authenticate with Facebook with email {email}. Stage: Initial Request for datr Token, Status code: {login_page.status_code}.') + return matches[1] + + def _get_pubkey_from_html(self, html): + FACEBOOK_PUBKEY_REGEXP = r'\"pubKey\":{"publicKey":"(.+?)","keyId":(\d+?)}}' + regexp = re.compile(FACEBOOK_PUBKEY_REGEXP, re.MULTILINE) + + matches = regexp.search(html) + + if not matches or len(matches.groups()) != 2: + self.logger.debug(html) + self.logger.error(f'Match failed or unexpected number of regexp matches when trying to get pubKey.') raise SystemError + + public_key = matches[1] + key_id = int(matches[2]) - matches = regexp.search(login_page.text) + return (public_key, key_id) - if not matches or len(matches.groups()) != 1: + def authenticate(self, email, password): + """ Authenticate with Facebook setting up session for further requests """ + + FACEBOOK_LOGIN_URL = 'http://www.facebook.com/login' + + login_page = self.browser.open(FACEBOOK_LOGIN_URL) + + if login_page.status_code != 200: self.logger.debug(login_page.text) - self.logger.error(f'Match failed or unexpected number of regexp matches when trying to get datr token.') + self.logger.error(f'Failed to authenticate with Facebook with email {email}. Stage: Initial Request for datr Token, Status code: {login_page.status_code}.') raise SystemError - - _js_datr = matches[1] + + # Add 'datr' cookie to session for countries adhering to GDPR compliance + _js_datr = self._get_datr_token_from_html(login_page.text) datr_cookie = requests.cookies.create_cookie(domain='.facebook.com', name='datr', value=_js_datr) - _js_datr_cookie = requests.cookies.create_cookie(domain='.facebook.com', name='_js_datr', value=_js_datr) self.browser.get_cookiejar().set_cookie(datr_cookie) + + _js_datr_cookie = requests.cookies.create_cookie(domain='.facebook.com', name='_js_datr', value=_js_datr) self.browser.get_cookiejar().set_cookie(_js_datr_cookie) - # Perform main login now - login_page = self.browser.get(FACEBOOK_LOGIN_URL) + # Prepare to send form + login_form = self.browser.select_form("form#login_form") + login_form.set("email", email) - if login_page.status_code != 200: - self.logger.debug(login_page.text) - self.logger.error(f'Failed to authenticate with Facebook with email {email}. Stage: Main Login Attempt, Status code: {login_page.status_code}.') - raise SystemError + # Encrypt password into enc_pass + # Facebook only accepts encrypted passwords in a specific format + public_key, key_id = self._get_pubkey_from_html(login_page.text) + enc_pass = facebook_web_encrypt_password(key_id, public_key, password) + + # enc_pass is typically computed and included in requests pre-flight with javascript + # Since we aren't executing javascript we'll just create the input field and include it here so it makes it into our request + enc_pass_input = Tag(name="input", attrs={"type": "hidden", "name": "encpass", "value": enc_pass}) + login_form.form.append(enc_pass_input) - login_form = login_page.soup.find('form', {'id': 'login_form'}) - login_form.find('input', {'id': 'email'})['value'] = email - login_form.find('input', {'id': 'pass'})['value'] = password - login_response = self.browser.submit(login_form, login_page.url) + login_response = self.browser.submit_selected() if login_response.status_code != 200: self.logger.debug(login_response.text) diff --git a/fb2cal/utils.py b/fb2cal/utils.py index dbe9a37..d2d64d3 100644 --- a/fb2cal/utils.py +++ b/fb2cal/utils.py @@ -1,3 +1,12 @@ +import base64 +import struct +import datetime +import binascii + +from Cryptodome import Random +from Cryptodome.Cipher import AES +from nacl.public import PublicKey, SealedBox + from .facebook_user import FacebookUser # Generates permalink to Facebook profile url @@ -9,3 +18,33 @@ def generate_facebook_profile_url_permalink(facebook_user: FacebookUser): # It must be stripped away before parsing a response as JSON def remove_anti_hijacking_protection(text: str): return text.removeprefix("for (;;);") + +# Encryption used on plain text passwords before they are sent to Facebook. +# This function uses the #PWD_BROWSER type which is for Facebook Web requests. +# +# Credits to Lorenzo Di Fuccia: https://gist.github.com/lorenzodifuccia/c857afa47ede66db852e6a25c0a1a027 +# +# TODO: Avoid hardcoding the version 5 (instagram has: https://www.instagram.com/data/shared_data/) +def facebook_web_encrypt_password(key_id, pub_key, password, version=5): + key = Random.get_random_bytes(32) + iv = bytes([0] * 12) + + time = int(datetime.datetime.now().timestamp()) + + aes = AES.new(key, AES.MODE_GCM, nonce=iv, mac_len=16) + aes.update(str(time).encode('utf-8')) + encrypted_password, cipher_tag = aes.encrypt_and_digest(password.encode('utf-8')) + + pub_key_bytes = binascii.unhexlify(pub_key) + seal_box = SealedBox(PublicKey(pub_key_bytes)) + encrypted_key = seal_box.encrypt(key) + + encrypted = bytes([1, + key_id, + *list(struct.pack('=0.6 requests -freezegun \ No newline at end of file +freezegun +pycryptodomex +PyNaCl \ No newline at end of file