Skip to content

Commit

Permalink
use user id as node label
Browse files Browse the repository at this point in the history
  • Loading branch information
JoanMassachs committed May 12, 2022
1 parent 5ac8219 commit 296ac4c
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 13 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,13 @@ larger than 10:

The possible node attributes are the following:
- `screen_name`:
When the node is a user, its username.
When the node is a user, its username;
by default, it is used as the label of the nodes.
When the node is a tweet, the username of its author.
- `user_id`:
When the node is a user, its id.
When the node is a user, its id;
if you want to use it as the label of the nodes,
you can use the flag `--id-as-label`.
When the node is a tweet, the id of its author.
- `start_date`:
The date of the first interaction that made the node appear in the graph.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="twarc-network",
version="0.1.0",
version="0.1.1",
url="https://github.com/docnow/twarc-network",
author="Ed Summers",
author_email="[email protected]",
Expand Down
30 changes: 20 additions & 10 deletions twarc_network/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,16 @@
)
@click.option("--min-component-size", type=int, help="Minimum weakly connected component size to include")
@click.option("--max-component-size", type=int, help="Maximum weakly connected component size to include")
@click.option("--id-as-label", is_flag=True, help="Use user id as node label")
@click.argument("infile", type=click.File("r"), default="-")
@click.argument("outfile", type=click.File("w"), default="-")
def network(format, nodes, edges, infile, outfile, min_component_size, max_component_size):
def network(format, nodes, edges, infile, outfile, min_component_size,
max_component_size, id_as_label):
"""
Generates a network graph of tweets as GEXF, GML, DOT, JSON, HTML, CSV.
"""

g = get_graph(infile, nodes, edges)
g = get_graph(infile, nodes, edges, id_as_label)

# if the user wants to limit component min/max sizes
if min_component_size or max_component_size:
Expand Down Expand Up @@ -78,7 +80,7 @@ def network(format, nodes, edges, infile, outfile, min_component_size, max_compo
outfile.write(html)


def get_graph(infile, nodes_type, edge_types):
def get_graph(infile, nodes_type, edge_types, id_as_label):
g = networkx.DiGraph()

for line in infile:
Expand Down Expand Up @@ -113,6 +115,7 @@ def get_graph(infile, nodes_type, edge_types):
edge_type,
created_at_date,
edge_types,
id_as_label,
)
if "mention" in edge_types:
mentions = t.get("entities", dict()).get("mentions", [])
Expand All @@ -130,6 +133,7 @@ def get_graph(infile, nodes_type, edge_types):
"mention",
created_at_date,
edge_types,
id_as_label,
)

elif nodes_type == "tweets":
Expand Down Expand Up @@ -175,32 +179,38 @@ def get_graph(infile, nodes_type, edge_types):


def add_user_edge(g, from_user, from_user_id, to_user, to_user_id, edge_type,
created_at, edge_types):
created_at, edge_types, id_as_label):

# storing start_date will allow for timestamps for gephi timeline, where nodes
# will appear on screen at their start date and stay on forever after

if id_as_label:
from_label = from_user_id
to_label = to_user_id
else:
from_label = from_user
to_label = to_user
g.add_node(
from_user,
from_label,
screen_name=from_user,
user_id=from_user_id,
start_date=created_at,
)
g.add_node(
to_user,
to_label,
screen_name=to_user,
user_id=to_user_id,
start_date=created_at,
)

if g.has_edge(from_user, to_user):
weights = g[from_user][to_user]
if g.has_edge(from_label, to_label):
weights = g[from_label][to_label]
else:
g.add_edge(from_user, to_user)
g.add_edge(from_label, to_label)
weights = {t: 0 for t in ("weight", ) + edge_types}
weights["weight"] += 1
weights[edge_type] += 1
g[from_user][to_user].update(weights)
g[from_label][to_label].update(weights)


def add_tweet_edge(g, from_user, from_user_id, from_id, to_user, to_user_id,
Expand Down

0 comments on commit 296ac4c

Please sign in to comment.