-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
101 lines (91 loc) · 3.12 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from requests_html import HTMLSession, HTML
from urllib.parse import quote_plus
import requests
import re
def check_if_valid(token):
resp=requests.get(f'https://graph.facebook.com/v15.0/debug_token?input_token={token}&access_token={token}')
try:
return resp.json()['data']['is_valid']
except:
return False
def post_fb(message, token):
resp=requests.post(f"https://graph.facebook.com/v15.0/113023048137080/feed?message={quote_plus(message)}&access_token={token}")
if resp.status_code==200:
return True
else:
return False
class FB_Scrapper:
def __init__(self, page_id:str, post_amount:int, cookie:str):
self.page_id=page_id
self.post_amount=post_amount
self.cookie=convert_to_dict(cookie)
self.cursor='abcdefg'
self.session=HTMLSession()
self.posts=self.get_posts()
def get_posts(self) -> list:
posts=[]
try:
while True:
resp=self.send_request()
self.cursor=resp.search('cursor={}&')[0]
for article in resp.find('article'):
if len(article.find('article'))==2:
if not article.find('article')[1].find('img[alt]') and not article.find('article')[1].find('div[data-ft=\'{"tn":"*s"}\']', first=True).find('a'):
posts.append(FB_Post(article.find('article')[1]))
else:
if not FB_Post(article) in posts:
if not article.find('img[alt]') and not article.find('div[data-ft=\'{"tn":"*s"}\']', first=True).find('a'):
posts.append(FB_Post(article))
if len(posts)>=self.post_amount:
return posts
except Exception as e:
print('Error:', str(e), self.page_id)
return []
def send_request(self) -> HTML:
resp=self.session.get(f'https://mbasic.facebook.com/profile/timeline/stream/?cursor={self.cursor}&profile_id={self.page_id}', cookies=self.cookie)
return resp.html
class FB_Post:
def __init__(self, obj):
self.attr_id=obj.attrs['id']
self.attr_class=' '.join(obj.attrs['class'])
self.author=obj.find('h3', first=True).text
self.content=obj.find('div[data-ft=\'{"tn":"*s"}\']', first=True).text
def __repr__(self):
return self.content
def __eq__(self, other):
return self.content==other.content
def convert_to_dict(cookie:str) -> dict:
try:
fb_cookies=cookie.replace(' ','')
fb_cookies=fb_cookies.replace('\n','')
fb_cookies=fb_cookies.split(';')
if '' in fb_cookies:
fb_cookies.remove('')
fb_cookies_dict={}
for item in fb_cookies:
name, value=item.split('=')
fb_cookies_dict[name]=value
return fb_cookies_dict
except:
return False
def check_acc_ie(cookies:str):
try:
resp=requests.get('https://mbasic.facebook.com/smshahriar.zarir.94', cookies=convert_to_dict(cookies))
html=resp.text
match=re.findall(r'profile_id=\d+', html)
if not match:
match=re.findall(r'owner_id=\d+', html)
if not match:
match=re.findall(r'confirm/\?bid=\d+', html)
if not match:
match=re.findall(r'subscribe.php\?id=\d+', html)
if not match:
match=re.findall(r'subject_id=\d+', html)
if not match:
match=re.findall(r'poke_target=\d+', html)
if not match:
return False
fb_id=match[0].split('=')[1]
return fb_id
except:
return False