From cc3498af87d7df711ee42490cbac01d5831ac4ae Mon Sep 17 00:00:00 2001 From: minamotorin <76122224+minamotorin@users.noreply.github.com> Date: Sun, 2 Jan 2022 15:00:44 +0000 Subject: [PATCH 1/2] Fix RefreshTokenException('Could not find the Guest token in HTML') ABOUT THE PROBLEM This problem has recently begun to occur on some environments. This doesn't happen every time, so if you are lucky, you don't get the error. The cause is literally literally that twint could not find the Guest token in HTML. Actually, sometimes token isn't included in HTML recently. #!/usr/bin/env python3 # This program is WTFPL. import requests res = requests.get('https://twitter.com') print(res.text.split('\n')[-1]) twint require the result of running the above code is })();. However, sometimes the result is only })(); and missing the Guest token. ABOUT THE SOLUTION In this patch, twint get the Guest token from https://api.twitter.com/1.1/guest/activate.json if could not find the one. The author referred to the code of gallery-dl: https://github.com/mikf/gallery-dl/blob/47eae4c393f09937a5dbcc2cb978702fb173e747/gallery_dl/extractor/twitter.py#L780-L783 Author's note: > I don't understand session of requests, so the code may be not good. > I hope someone rewrite the patch better and create a pull request. This commit was adopted from: https://github.com/twintproject/twint/issues/1320#issuecomment-1003094346 Closes https://github.com/twintproject/twint/issues/1320. --- twint/token.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/twint/token.py b/twint/token.py index ae66a24a..2eedcee4 100644 --- a/twint/token.py +++ b/twint/token.py @@ -65,5 +65,30 @@ def refresh(self): logme.debug('Found guest token in HTML') self.config.Guest_token = str(match.group(1)) else: - self.config.Guest_token = None - raise RefreshTokenException('Could not find the Guest token in HTML') + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0', + 'authority': 'api.twitter.com', + 'content-length': '0', + 'authorization': self.config.Bearer_token, + 'x-twitter-client-language': 'en', + 'x-csrf-token': res.cookies.get("ct0"), + 'x-twitter-active-user': 'yes', + 'content-type': 'application/x-www-form-urlencoded', + 'accept': '*/*', + 'sec-gpc': '1', + 'origin': 'https://twitter.com', + 'sec-fetch-site': 'same-site', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': 'https://twitter.com/', + 'accept-language': 'en-US', + } + self._session.headers.update(headers) + req = self._session.prepare_request(requests.Request('POST', 'https://api.twitter.com/1.1/guest/activate.json')) + res = self._session.send(req, allow_redirects=True, timeout=self._timeout) + match = re.search(r'{"guest_token":"(\d+)"}', res.text) + if match: + self.config.Guest_token = str(match.group(1)) + else: + self.config.Guest_token = None + raise RefreshTokenException('Could not find the Guest token in HTML') From f2a0d8cd76f49a59c74f4e911df4a0afadd0531f Mon Sep 17 00:00:00 2001 From: codeghees <40551128+codeghees@users.noreply.github.com> Date: Wed, 12 Jan 2022 20:10:06 +0500 Subject: [PATCH 2/2] Update user.py --- twint/user.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/twint/user.py b/twint/user.py index 6fb1e58a..9391451f 100644 --- a/twint/user.py +++ b/twint/user.py @@ -28,6 +28,9 @@ def User(ur): _usr.username = ur['data']['user']['legacy']['screen_name'] _usr.bio = ur['data']['user']['legacy']['description'] _usr.location = ur['data']['user']['legacy']['location'] + _usr.url = "" + if 'url' in ur['data']['user']['legacy']: + _usr.url = ur['data']['user']['legacy']['url'] _usr.url = ur['data']['user']['legacy']['url'] # parsing date to user-friendly format _dt = ur['data']['user']['legacy']['created_at'] @@ -46,7 +49,9 @@ def User(ur): _usr.is_private = ur['data']['user']['legacy']['protected'] _usr.is_verified = ur['data']['user']['legacy']['verified'] _usr.avatar = ur['data']['user']['legacy']['profile_image_url_https'] - _usr.background_image = ur['data']['user']['legacy']['profile_banner_url'] + _usr.background_image = "" + if 'profile_banner_url' in ur['data']['user']['legacy']: + _usr.background_image = ur['data']['user']['legacy']['profile_banner_url'] # TODO : future implementation # legacy_extended_profile is also available in some cases which can be used to get DOB of user return _usr