-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorg_social_links.py
155 lines (141 loc) · 6.73 KB
/
org_social_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import requests
import math
import os
# Session and wpEditToken are updated regularly by MediaWiki
# Retrieve from header and POST parameters sent by the browser on a save page request
session = "***"
edittoken = "***"
wikitoken = "***"
# Get links from mediawiki category page content
def getLinks(r):
orgs = []
sections = r.split("</h3>")[1:-8]
for section in sections:
links = section.split('href="')[1:]
for link in links:
if "index.php/" in link:
orgs += [link.split('"')[0]]
return orgs
# Create link to add to page text
def getSocialAddition(link, dork, ignorelist, linktitle):
socialadd = ""
try:
resp = str(requests.get(link).content)
if dork in resp:
resp = resp.split(dork)[1]
if '"' in resp:
if "'" in resp:
if resp.find('"') < resp.find("'"):
resp = resp.split('"')[0]
else:
resp = resp.split("'")[0]
else:
resp = resp.split('"')[0]
else:
resp = resp.split("'")[0]
ignore = False
for ignoreitem in ignorelist:
if ignoreitem in resp:
ignore = True
if not ignore:
socialadd = "[https://"+dork+resp+" "+linktitle+"]"
except Exception:
pass
return socialadd
# Collect list of org links
resp = str(requests.get("https://bciwiki.org/index.php/Category:Organizations").content)
organizations = []
organizations += getLinks(resp)
outof = resp.split("out of ")[1]
outof = float(outof.split(" ")[0])
pagecount = outof/200
pagecount = math.ceil(pagecount)
nextpage = resp.split(') (<a href="')[1]
nextpage = nextpage.split('"')[0]
nextpage = nextpage.replace("amp;", "")
nextpage = nextpage.replace("amp%", "")
while pagecount > 1:
resp = str(requests.get("https://bciwiki.org"+nextpage).content)
organizations += getLinks(resp)
if ') (<a href="' in resp:
nextpage = resp.split(') (<a href="')[1]
nextpage = nextpage.split('"')[0]
nextpage = nextpage.replace("amp;", "")
nextpage = nextpage.replace("amp%", "")
pagecount -= 1
# Find social links and add to organization pages
intext = []
print(len(organizations))
for a in range(0, len(organizations)):
link = str(requests.get("https://bciwiki.org"+organizations[a]).content)
link = link.split('"external text" href="')[2]
link = link.split('"')[0]
if a % 25 == 0:
print(a)
twitteradd = getSocialAddition(link, "twitter.com/", ["Tweets by ", "http", " ", "status/", "share", "squarespace", ".js"], "Twitter")
fbadd = getSocialAddition(link, "facebook.com/", ["/fbml", "http", " ", "plugin", "tr?", "squarespace"], "Facebook")
igadd = getSocialAddition(link, "instagram.com/", ["http", " ", "squarespace"], "Instagram")
ytadd = getSocialAddition(link, "youtube.com/", ["http", " ", "watch?v=", "embed/", "squarespace", "iframe_api"], "YouTube")
ghadd = getSocialAddition(link, "github.com/", ["http", " ", "squarespace"], "GitHub")
if twitteradd != "" or fbadd != "" or igadd != "" or ytadd != "" or "linkedin.com/" in resp or "twitter.com/" in resp or "facebook.com/" in resp or "instagram.com/" in resp or "github.com/" in resp:
# Include links and relevant categories in page text
resp = requests.get("https://bciwiki.org/index.php?title="+organizations[a].split("/")[2]+"&action=edit").text
if 'wpTextbox1">' in resp:
resp = resp.split('wpTextbox1">')[1]
resp = resp.split('</textarea>')[0]
resp = resp.replace("\\n", "\n")
resp = resp.replace("\\", "")
newtext = resp
if "twitter.com/" not in resp:
newtext += twitteradd
if "[[Category:Twitter Accounts]]" not in resp and "twitter.com/" in newtext:
newtext = "[[Category:Twitter Accounts]]\n"+newtext
print(organizations[a]+" - twitter category")
if "facebook.com/" not in resp:
newtext += fbadd
if "[[Category:Facebook Pages]]" not in resp and "facebook.com/" in newtext:
newtext = "[[Category:Facebook Pages]]\n"+newtext
print(organizations[a]+" - facebook category")
if "instagram.com/" not in resp:
newtext += igadd
if "[[Category:Instagram Pages]]" not in resp and "instagram.com/" in newtext:
newtext = "[[Category:Instagram Pages]]\n"+newtext
print(organizations[a]+" - instagram category")
if "youtube.com/" not in resp:
newtext += ytadd
if "[[Category:YouTube Channels]]" not in resp and "youtube.com/" in newtext:
newtext = "[[Category:YouTube Channels]]\n"+newtext
print(organizations[a]+" - youtube category")
if "github.com/" not in resp:
newtext += ghadd
if "[[Category:GitHub Accounts]]" not in resp and "github.com/" in newtext:
newtext = "[[Category:GitHub Accounts]]\n"+newtext
print(organizations[a]+" - github category")
if "[[Category:LinkedIn Accounts]]" not in resp and "linkedin.com/" in newtext:
newtext = "[[Category:LinkedIn Accounts]]\n"+newtext
print(organizations[a]+" - linkedin category")
# Update page text
resp = str(requests.post("https://bciwiki.org/index.php?title="+organizations[a].split("/")[2]+"&action=submit", params={
"wpUnicodeCheck": open("unicode.txt", mode="r", encoding="utf-8").read(),
"wpSave": "Save changes",
"format": "text/x-wiki",
"model": "wikitext",
"editingStatsId": "7f2242361389761f3b10bafae723dc66",
"wpStarttime": "",
"wpEdittime": "",
"wpTextbox1": newtext,
"wpEditToken": edittoken,
"wpWatchthis": "",
"wpUltimateParam": "1",
"mode": "text"}, cookies={
"ls_smartpush": "1",
"mw_installer_session": "12a3a7a4b67dc73e0caf0771faa52610",
"u216253868_bciwiki_session": session,
"u216253868_bciwikiToken": wikitoken,
"u216253868_bciwikiUserID": "1",
"u216253868_bciwikiUserName": "Landonodnal",
"VEE": "wikitext",
"wikiEditor-0-toolbar-section": "advanced"
}, headers={'Content-type': 'text/html; charset=utf-8'}).content)
else:
print(organizations[a].split("/")[2])