Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: covidgraph/graph-processing_fragmentize_text
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 0.0.7
Choose a base ref
...
head repository: covidgraph/graph-processing_fragmentize_text
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
  • 16 commits
  • 6 files changed
  • 2 contributors

Commits on Apr 23, 2020

  1. batch NEXT relationships

    Martin committed Apr 23, 2020
    Copy the full SHA
    28c3a7b View commit details
  2. fix index bug

    Martin committed Apr 23, 2020
    Copy the full SHA
    6b1ddea View commit details
  3. fix query string bug

    Martin committed Apr 23, 2020
    Copy the full SHA
    e3b8195 View commit details
  4. fix brackets in query string

    Martin committed Apr 23, 2020
    Copy the full SHA
    a669af6 View commit details
  5. test function

    Martin committed Apr 23, 2020
    Copy the full SHA
    ddf2159 View commit details
  6. fix bug in link query

    Martin committed Apr 23, 2020
    Copy the full SHA
    5e70373 View commit details

Commits on Apr 27, 2020

  1. Copy the full SHA
    09539cb View commit details
  2. Copy the full SHA
    30207d2 View commit details

Commits on Apr 28, 2020

  1. Merge pull request #2 from frankschmitt/typos-fix-1

    Small doc improvements
    mpreusse authored Apr 28, 2020
    Copy the full SHA
    eb46d9d View commit details

Commits on Apr 29, 2020

  1. decrease batch size for fragment links

    Martin committed Apr 29, 2020
    Copy the full SHA
    2471f4b View commit details
  2. Merge branch 'master' of github.com:covidgraph/graph-processing_fragm…

    …entize_text
    Martin committed Apr 29, 2020
    Copy the full SHA
    6b1a6c6 View commit details

Commits on Jul 7, 2020

  1. reduce batch size for NEXT relationships

    Martin committed Jul 7, 2020
    Copy the full SHA
    ef650ad View commit details
  2. down to 5

    Martin committed Jul 7, 2020
    Copy the full SHA
    11574d4 View commit details

Commits on Jul 27, 2020

  1. do not create NEXT relationships

    Martin committed Jul 27, 2020
    Copy the full SHA
    ce0dfee View commit details

Commits on Nov 18, 2020

  1. work with new NEO4J config

    Martin committed Nov 18, 2020
    Copy the full SHA
    431f021 View commit details
  2. Copy the full SHA
    2cbce19 View commit details
Showing with 52 additions and 23 deletions.
  1. +2 −2 .gitignore
  2. +1 −1 Dockerfile
  3. +9 −2 README.md
  4. +1 −3 docker-compose.yml
  5. +31 −15 run.py
  6. +8 −0 test_run.py
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

.env
# Created by https://www.gitignore.io/api/python
# Edit at https://www.gitignore.io/?templates=python

@@ -119,7 +119,7 @@ dmypy.json
.LSOverride

# Icon must end with two \r
Icon
Icon

# Thumbnails
._*
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3
FROM python:3.8

RUN mkdir /src
RUN mkdir /download
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# create :Fragement nodes for publications
# Requirements
## Client
py2neo

## Server
The Neo4J server needs to have APOC installed. Just grab the version you need from https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/ and put it in the plugins/ directory of your Neo4J installation.

# create :Fragment nodes for publications

## Body_text
```
@@ -51,4 +58,4 @@ WHERE f.sequence > 0
MATCH (f)<--(n)-->(f2:TestFragment:FromAbstract)
WHERE f2.sequence = f.sequence - 1
MERGE (f2)-[:NEXT]->(f)
```
```
4 changes: 1 addition & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -4,6 +4,4 @@ services:
run_graph-processing_fragmentize_text:
build: .
environment:
- GC_NEO4J_URL=bolt://host.docker.internal:7687
- GC_NEO4J_USER=neo4j
- GC_NEO4J_PASSWORD=test
- NEO4J=${NEO4J}
46 changes: 31 additions & 15 deletions run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import logging
import py2neo
import json

logging.basicConfig(level=logging.DEBUG)
logging.getLogger('py2neo.connect.bolt').setLevel(logging.WARNING)
@@ -10,13 +11,20 @@

log = logging.getLogger(__name__)

GC_NEO4J_URL = os.getenv('GC_NEO4J_URL', 'bolt://localhost:7687')
GC_NEO4J_USER = os.getenv('GC_NEO4J_USER', 'neo4j')
GC_NEO4J_PASSWORD = os.getenv('GC_NEO4J_PASSWORD', 'test')
RUN_MODE = os.getenv('RUN_MODE', 'prod')
NEO4J_CONFIG_STRING = os.getenv("NEO4J")

try:
log.info(NEO4J_CONFIG_STRING)
NEO4J_CONFIG_DICT = json.loads(NEO4J_CONFIG_STRING)
except json.decoder.JSONDecodeError:
# try to replace single quotes with double quotes
# JSON always expects double quotes, common mistake when writing JSON strings
NEO4J_CONFIG_STRING = NEO4J_CONFIG_STRING.replace("'", '"')
log.info(NEO4J_CONFIG_STRING)
NEO4J_CONFIG_DICT = json.loads(NEO4J_CONFIG_STRING)

for v in [GC_NEO4J_URL, GC_NEO4J_USER, GC_NEO4J_PASSWORD]:
log.debug(v)
log.info(f'dict {NEO4J_CONFIG_DICT}')
RUN_MODE = os.getenv('RUN_MODE', 'prod')


def create_query_fragments_for_node(label, text_property):
@@ -27,7 +35,7 @@ def create_query_fragments_for_node(label, text_property):
:param text_property: Name of the property containing the text
:return: Query to create :Fragment nodes
"""
log.debug("Create qeuery for label {}, text property {}".format(label, text_property))
log.debug("Create query for label {}, text property {}".format(label, text_property))

q = """CALL apoc.periodic.iterate(
\"MATCH (text_node:{0}) WHERE NOT text_node:CollectionHub AND NOT (text_node)-[:HAS_FRAGMENT]-() RETURN text_node\",
@@ -54,11 +62,13 @@ def create_query_link_fragments(label):
"""
log.debug("Create query to link fragments from {}".format(label))

q = """MATCH (f:Fragment:{0})
WHERE f.sequence > 0
MATCH (f)<--(n)-->(f2:Fragment:From{0})
WHERE f2.sequence = f.sequence - 1
MERGE (f2)-[:NEXT]->(f)""".format(label)
q = """CALL apoc.periodic.iterate(
\"MATCH (f:Fragment:From{0}) WHERE f.sequence > 0 RETURN f\",
\"MATCH (f)<--(n)-->(f2:Fragment:From{0})
WHERE f2.sequence = f.sequence - 1
MERGE (f2)-[:NEXT]->(f)\",
{{batchSize: 5, iterateList: true, parallel: true}}
)""".format(label)

log.debug(q)
return q
@@ -68,9 +78,15 @@ def create_query_link_fragments(label):
if RUN_MODE.lower() == 'test':
log.info("Run tests")
else:
graph = py2neo.Graph(GC_NEO4J_URL, user=GC_NEO4J_USER, password=GC_NEO4J_PASSWORD)
graph = py2neo.Graph(**NEO4J_CONFIG_DICT)
log.debug(graph)

# create index
try:
graph.run("CREATE INDEX ON :Fragment(sequence)")
except py2neo.database.work.ClientError:
log.info("Create index on :Fragment(sequence) fails, likely Neo4j 4 which throws error when trying to create existing index.")

# create fragments for Body_text
log.debug("Create fragments for BodyText and Abstract")

@@ -87,5 +103,5 @@ def create_query_link_fragments(label):
query_fragments = create_query_fragments_for_node(label, prop)
graph.run(query_fragments)

query_link_fragments = create_query_link_fragments(label)
graph.run(query_link_fragments)
#query_link_fragments = create_query_link_fragments(label)
#graph.run(query_link_fragments)
8 changes: 8 additions & 0 deletions test_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from run import create_query_link_fragments


def test_query_link_fragments():

q = create_query_link_fragments('BodyText')

assert q