forked from graph-knowledgegraph/KDD2019-HandsOn-Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3.NetworkSimilarityDemo.py
106 lines (72 loc) · 3.24 KB
/
3.NetworkSimilarityDemo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Databricks notebook source
# MAGIC %md # Module III. Network Similarity
# COMMAND ----------
# MAGIC %run "./TutorialClasses"
# COMMAND ----------
# Azure Storage account name
MagAccount = 'kdd2019magstore'
# MAG container name
MagContainer = 'mag-2019-06-07'
# Shared access signature of MAG Container
MagSAS = '?sv=2018-03-28&ss=bfqt&srt=sco&sp=rl&se=2019-08-10T02:22:11Z&st=2019-07-31T18:22:11Z&spr=https&sig=m8XIxbDhk3ZOBt5ceVYIodw3k0JhbXodUBZDxXOThcs%3D'
# COMMAND ----------
mag = MicrosoftAcademicGraph(container=MagContainer, account=MagAccount, sas=MagSAS)
Affiliations = mag.getDataframe('Affiliations')
ConferenceSeries = mag.getDataframe('ConferenceSeries')
FieldsOfStudy = mag.getDataframe('FieldsOfStudy')
Journals = mag.getDataframe('Journals')
# COMMAND ----------
# MAGIC %md ## Similar Affiliations
# COMMAND ----------
ResourcePath = 'ns/AffiliationEmbedding_d100.tsv'
ns = NetworkSimilarity(resource=ResourcePath, container=MagContainer, account=MagAccount, sas=MagSAS)
df = ns.getDataframe()
display(df)
# COMMAND ----------
id1 = 1290206253 # Microsoft
id2 = 136199984 # Harvard University
print(ns.getSimilarity(id1, id2))
# COMMAND ----------
topEntities = ns.getTopEntities(id1)
topEntitiesWithName = topEntities.join(Affiliations, topEntities.EntityId == Affiliations.AffiliationId, 'inner') \
.select(Affiliations.AffiliationId, Affiliations.DisplayName, topEntities.EntityType, topEntities.Score) \
.orderBy(topEntities.Score.desc())
display(topEntitiesWithName)
# COMMAND ----------
# MAGIC %md ## Similar Venues
# COMMAND ----------
ResourcePath = 'ns/VenueEmbedding_d100.tsv'
ns = NetworkSimilarity(resource=ResourcePath, container=MagContainer, account=MagAccount, sas=MagSAS)
df = ns.getDataframe()
#display(df)
# COMMAND ----------
id1 = 1130985203 # KDD
id2 = 137773608 # Nature
print(ns.getSimilarity(id1, id2))
# COMMAND ----------
# Union conferences and journals to get venue dataframe
conf = ConferenceSeries.select(ConferenceSeries.ConferenceSeriesId.alias('VenueId'), ConferenceSeries.NormalizedName, ConferenceSeries.DisplayName)
jour = Journals.select(Journals.JournalId.alias('VenueId'), Journals.NormalizedName, Journals.DisplayName)
venues = conf.union(jour)
# COMMAND ----------
topEntities = ns.getTopEntities(id1)
topEntitiesWithName = topEntities.join(venues, topEntities.EntityId == venues.VenueId, 'inner') \
.select(venues.VenueId, venues.NormalizedName, venues.DisplayName, topEntities.EntityType, topEntities.Score) \
.orderBy(topEntities.Score.desc())
display(topEntitiesWithName)
# COMMAND ----------
# MAGIC %md ## Similar Fields of Study
# COMMAND ----------
ResourcePath = 'ns/FosEmbedding_d100.tsv'
ns = NetworkSimilarity(resource=ResourcePath, container=MagContainer, account=MagAccount, sas=MagSAS)
df = ns.getDataframe()
# COMMAND ----------
id1 = 124101348 # Data mining
id2 = 108583219 # Deep learning
print(ns.getSimilarity(id1, id2))
# COMMAND ----------
topEntities = ns.getTopEntities(id1)
topEntitiesWithName = topEntities.join(FieldsOfStudy, topEntities.EntityId == FieldsOfStudy.FieldOfStudyId, 'inner') \
.select(FieldsOfStudy.FieldOfStudyId, FieldsOfStudy.DisplayName, topEntities.EntityType, topEntities.Score) \
.orderBy(topEntities.Score.desc())
display(topEntitiesWithName)