Skip to content

Commit

Permalink
more flake8 happine8
Browse files Browse the repository at this point in the history
  • Loading branch information
vedina committed Nov 25, 2024
1 parent f061f07 commit 34a93b9
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 81 deletions.
5 changes: 3 additions & 2 deletions src/rcapi/models/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydantic import BaseModel
from typing import Dict, Optional, Union
from typing import Dict, Optional


class Task(BaseModel):
Expand All @@ -15,4 +15,5 @@ class Task(BaseModel):
result_uuid: Optional[str] = None
errorCause: Optional[str] = None

tasks_db: Dict[str, Task] = {}

tasks_db: Dict[str, Task] = {}
162 changes: 89 additions & 73 deletions src/rcapi/services/convertor_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,84 +5,91 @@
import base64
import numpy as np
from numcompress import compress
import h5py, h5pyd
from rcapi.services.solr_query import solr_query_get,SOLR_VECTOR
import h5py
import h5pyd
from rcapi.services.solr_query import solr_query_get, SOLR_VECTOR
import traceback
import tempfile
import shutil
from ramanchada2.spectrum import from_local_file
import os
import hashlib
from typing import Tuple
import ramanchada2 as rc2
import ramanchada2 as rc2
import numpy.typing as npt
import os
from scipy.interpolate import Akima1DInterpolator

x4search = np.linspace(140,3*1024+140,num=2048)

def empty_figure(figsize,title,label) -> Figure:
x4search = np.linspace(140, 3*1024+140, num=2048)


def empty_figure(figsize, title, label) -> Figure:
fig = Figure(figsize=figsize)
axis = fig.add_subplot(1, 1, 1)
axis.axis('off')
#axis.set_xticks([])
#axis.set_yticks([])
axis.scatter([0,1],[0,0],s=0)
axis.annotate(label,(0,0))
axis.annotate(label, (0,0))
axis.title.set_fontsize(8)
axis.set_title(title)
return fig

def dict2figure(pm,figsize) -> Figure:

def dict2figure(pm, figsize) -> Figure:
fig = Figure(figsize=figsize)
axis = fig.add_subplot(1, 1, 1)
n = 0
for key in pm:
if key=="domain" or key=="id" or key=="score":
if key == "domain" or key == "id" or key == "score":
continue
if type(pm[key])==str:
n=n+1
if type(pm[key]) == str:
n = n+1
score = float(pm["score"])
#axis.bar(["score"],[score],width=0.1)
#axis.bar(["score"],[1-score],width=0.1)
y = np.arange(0,n)
x = np.repeat(0,n)
y = np.arange(0, n)
x = np.repeat(0, n)

axis.scatter(x,y,s=0)
axis.scatter(x, y, s=0)
axis.axis('off')
i = 0
clr = {"high" : "r", "medium" : "y", "low" : "g", "fulfilled" : "b", "not fulfilled" : "c"}
for key in pm:
if key=="domain" or key=="id" or key=="score":
if key == "domain" or key == "id" or key == "score":
continue
if type(pm[key])==str:
if type(pm[key]) == str:
try:
color = clr[pm[key]]
except:
color="k"
axis.annotate("{}: {}".format(key.replace("_s",""),pm[key]),(0,i),color=color)
except Exception:
color = "k"
axis.annotate("{}: {}".format(key.replace("_s", ""), pm[key]), (0,i), color=color)
i = i+1
axis.title.set_fontsize(8)
axis.set_xlim(0,1)
axis.set_xlim(0, 1)
axis.set_title(pm["domain"])
return fig


def knnquery(domain,dataset="raw"):
def knnquery(domain, dataset="raw"):
try:
with h5pyd.File(domain,mode="r") as h5:
with h5pyd.File(domain, mode="r") as h5:
x = h5[dataset][0]
y = h5[dataset][1]
spe = rc2.spectrum.Spectrum(x,y)
spe_processed = preprocess_spectrum(spe,x4search,baseline=False)
spe = rc2.spectrum.Spectrum(x, y)
spe_processed = preprocess_spectrum(spe, x4search, baseline=False)
result_json = {}
result_json["cdf"] = compress(spe_processed.y.tolist(),precision=6)
#result_json["pdf"] = compress(pdf.tolist(),precision=6)
#return ','.join(map(str, cdf))
result_json["cdf"] = compress(spe_processed.y.tolist(), precision=6)
# result_json["pdf"] = compress(pdf.tolist(),precision=6)
# return ','.join(map(str, cdf))
try:
px = 1/plt.rcParams['figure.dpi'] # pixel in inches
fig = plot_spectrum(x,y,"query",h5[dataset].dims[0].label,h5[dataset].dims[1].label,figsize=(300*px, 200*px),
thumbnail=True,plot_kwargs={'color': 'green'})
fig = plot_spectrum(x, y, "query",
h5[dataset].dims[0]. label,
h5[dataset].dims[1].label,
figsize=(300*px, 200*px),
thumbnail=True,
plot_kwargs={'color': 'green'})
output = BytesIO()
FigureCanvas(fig).print_png(output)
base64_bytes = base64.b64encode(output.getvalue())
Expand All @@ -91,20 +98,21 @@ def knnquery(domain,dataset="raw"):
print(err)
return result_json
except Exception as err:
raise(err)
raise err


def plot_spectrum(x,y,title=None,xlabel=None,ylabel=None,thumbnail=True,figsize=None,plot_kwargs=None):
def plot_spectrum(x, y, title=None, xlabel=None, ylabel=None, thumbnail=True, figsize=None, plot_kwargs=None):
if figsize is None:
figsize=(6,4)
figsize = (6, 4)
if xlabel is None:
xlabel = r'wavenumber [$\mathrm{cm}^{-1}$]'
if ylabel is None:
ylabel = "intensity [a.u.]"
fig = Figure(figsize=figsize,constrained_layout=True)
fig = Figure(figsize=figsize, constrained_layout=True)
if plot_kwargs is None:
plot_kwargs = {}
axis = fig.add_subplot(1, 1, 1)
axis.plot(x, y,**plot_kwargs)
axis.plot(x, y, **plot_kwargs)
axis.set_xlabel(xlabel)
plt.subplots_adjust(bottom=0.1)
if not thumbnail:
Expand All @@ -116,80 +124,86 @@ def plot_spectrum(x,y,title=None,xlabel=None,ylabel=None,thumbnail=True,figsize=
return fig


def resample_spline(spe : rc2.spectrum.Spectrum, x4search : npt.NDArray):

def resample_spline(spe: rc2.spectrum.Spectrum, x4search: npt.NDArray):
spline = Akima1DInterpolator(spe.x, spe.y)
spe_spline = np.zeros_like(x4search)
xmin, xmax = spe.x.min(), spe.x.max()
within_range = (x4search >= xmin) & (x4search <= xmax)
spe_spline[within_range] = spline(x4search[within_range])
return rc2.spectrum.Spectrum(x=spe.x, y = spe_spline)
return rc2.spectrum.Spectrum(x=spe.x, y=spe_spline)

def preprocess_spectrum(spe : rc2.spectrum.Spectrum, x4search : npt.NDArray, baseline = False):
spe_nopedestal = rc2.spectrum.Spectrum(x=spe.x, y = spe.y - np.min(spe.y))
spe_resampled = resample_spline(spe_nopedestal,x4search)

def preprocess_spectrum(spe: rc2.spectrum.Spectrum, x4search: npt.NDArray, baseline=False):
spe_nopedestal = rc2.spectrum.Spectrum(x=spe.x, y=spe.y - np.min(spe.y))
spe_resampled = resample_spline(spe_nopedestal, x4search)
# baseline
if baseline:
spe_resampled = spe_resampled.subtract_baseline_rc1_snip(niter = 40)
spe_resampled = spe_resampled.subtract_baseline_rc1_snip(niter=40)
# L2 norm for searching
l2_norm = np.linalg.norm(spe_resampled.y)

return rc2.spectrum.Spectrum(x4search,spe_resampled.y / l2_norm)
return rc2.spectrum.Spectrum(x4search, spe_resampled.y / l2_norm)


def generate_etag(content: str) -> str:
return hashlib.md5(content.encode()).hexdigest()

async def solr2image(solr_url: str,domain : str,figsize=(6,4),extraprm =None, thumbnail : bool = True,token : str = None) -> Tuple[Figure, str]:


async def solr2image(solr_url: str, domain: str, figsize=(6, 4),
extraprm=None, thumbnail: bool = True,
token: str = None) -> Tuple[Figure, str]:
rs = None
try:

query="textValue_s:{}{}{}".format('"',domain,'"')
params = {"q": query, "fq" : ["type_s:study"], "fl" : "name_s,textValue_s,reference_s,reference_owner_s,{},updated_s,_version_".format(SOLR_VECTOR)}

rs = await solr_query_get(solr_url, params, token = token)
query = "textValue_s:{}{}{}".format('"', domain, '"')
params = {"q": query, "fq": ["type_s:study"],
"fl": "name_s,textValue_s,reference_s,reference_owner_s,{},updated_s,_version_".format(SOLR_VECTOR)}
rs = await solr_query_get(solr_url, params, token=token)
if rs is not None and rs.status_code == 200:
response_json = rs.json()
if "response" in response_json:
if response_json["response"]["numFound"] == 0:
return empty_figure(figsize,title="not found",label="{}".format(domain.split("/")[-1])),None
return empty_figure(figsize, title="not found", label="{}".format(domain.split("/")[-1])), None
x = None
for doc in response_json["response"]["docs"]:
y = doc[SOLR_VECTOR]
if y is None:
continue
if x is None:
x = x4search
_title = None if thumbnail else "{} {} {} ({})".format("" if extraprm is None else extraprm,
doc["name_s"],doc["reference_owner_s"],doc["reference_s"])
fig = plot_spectrum(x,y,_title,r'wavenumber [$\mathrm{cm}^{-1}$]',"intensity [a.u.]",
figsize=figsize,thumbnail=thumbnail)
etag = generate_etag("{}{}{}".format(doc["textValue_s"],doc.get("updated_s",""),doc.get("_version_","")))
return fig,etag

return empty_figure(figsize,"{} {}".format(rs.status_code,rs.reason),"{}".format(domain.split("/")[-1])),None

_title = None if thumbnail else "{} {} {} ({})".format(
"" if extraprm is None else extraprm,
doc["name_s"],
doc["reference_owner_s"], doc["reference_s"])
fig = plot_spectrum(x, y, _title, r'wavenumber [$\mathrm{cm}^{-1}$]', "intensity [a.u.]",
figsize=figsize, thumbnail=thumbnail)
etag = generate_etag("{}{}{}".format(doc["textValue_s"],
doc.get("updated_s",""), doc.get("_version_", "")))
return fig, etag
return empty_figure(figsize, "{} {}".format(rs.status_code, rs.reason), "{}".format(domain.split("/")[-1])), None
except Exception as err:
print(traceback.format_exc())
return empty_figure(figsize,title="{}".format(err),label="{}".format(domain.split("/")[-1])),None
return empty_figure(figsize, title="{}".format(err),
label="{}".format(domain.split("/")[-1])), None
finally:
if rs is not None:
await rs.aclose()


def recursive_copy(
src_group: h5py.Group | h5pyd.Group, dst_group: h5py.Group | h5pyd.Group, level=0
src_group: h5py.Group | h5pyd.Group, dst_group: h5py.Group | h5pyd.Group,
level=0
):
# every File instance is also an HDF5 group
# Copy attributes of the current group
for attr_name, attr_value in src_group.attrs.items():
dst_group.attrs[attr_name] = attr_value
for index,key in enumerate(src_group):
for index, key in enumerate(src_group):
try:
item = src_group[key]
if isinstance(item, (h5py.Group, h5pyd.Group)):
# Create the group in the destination file
new_group = dst_group.create_group(key)
recursive_copy(item, new_group,level+1)
recursive_copy(item, new_group, level+1)
elif isinstance(item, (h5py.Dataset, h5pyd.Dataset)):
if item.shape == (): # Scalar dataset
# Copy the scalar value directly
Expand All @@ -198,24 +212,26 @@ def recursive_copy(
# Copy the dataset to the destination file
dst_dataset = dst_group.create_dataset(key, data=item[:])
for attr_name, attr_value in item.attrs.items():
dst_dataset.attrs[attr_name] = attr_value
#dst_dataset.flush()
except Exception as err:
dst_dataset.attrs[attr_name] = attr_value
# dst_dataset.flush()
except Exception:
print(traceback.format_exc())


def read_spectrum_native(file,file_name,prefix="rcapi_"):
native_filename=None

def read_spectrum_native(file, file_name, prefix="rcapi_"):
native_filename = None
try:
filename, file_extension = os.path.splitext(file_name)
# because rc2 works with file paths only, no url nor file objects
with tempfile.NamedTemporaryFile(delete=False,prefix=prefix,suffix=file_extension) as tmp:
shutil.copyfileobj(file,tmp)
with tempfile.NamedTemporaryFile(
delete=False,
prefix=prefix, suffix=file_extension) as tmp:
shutil.copyfileobj(file, tmp)
native_filename = tmp.name
spe = from_local_file(native_filename)
spe = from_local_file(native_filename)
return spe
except Exception as err:
raise err
finally:
if native_filename!=None:
if native_filename != None:
os.remove(native_filename)
2 changes: 1 addition & 1 deletion tests/test_api_hsdsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

@pytest.fixture(scope="module")
def domain():
params = { "query_type": "metadata", "pagesize": 1}
params = {"query_type": "metadata", "pagesize": 1}
response = client.get("/db/query", params=params)
assert response.status_code == 200
_domain = response.json()[0]["value"]
Expand Down
9 changes: 5 additions & 4 deletions tests/test_api_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

TEST_ENDPOINT = "/db/query"


@pytest.fixture
def knnquery4test():
resource_path = files('resources.api',).joinpath('pdf2knnquery.txt')
Expand All @@ -30,7 +31,7 @@ def test_query_metadata():


def test_query_metadata_embeddedimages():
params = { "query_type": "metadata" , "img": "embedded"}
params = {"query_type": "metadata" , "img": "embedded"}
response = client.get(TEST_ENDPOINT, params=params)
assert response.status_code == 200
result = response.json()
Expand All @@ -40,11 +41,11 @@ def test_query_metadata_embeddedimages():
assert "value" in item, "'value' key missing"
assert "text" in item, "'text' key missing"
assert "imageLink" in item, "'imageLink' key missing"
#assert "spectrum_p1024" in item, "vector field key missing"
# assert "spectrum_p1024" in item, "vector field key missing"


def test_knnquery(knnquery4test):
params = { "query_type": "knnquery" , "ann": knnquery4test}
params = {"query_type": "knnquery", "ann": knnquery4test}
response = client.get(TEST_ENDPOINT, params=params)
assert response.status_code == 200
result = response.json()
Expand All @@ -55,7 +56,7 @@ def test_knnquery(knnquery4test):
assert "value" in item, "'value' key missing"
assert "text" in item, "'text' key missing"
assert "imageLink" in item, "'imageLink' key missing"
#assert SOLR_VECTOR in item, "vector field key missing"
# assert SOLR_VECTOR in item, "vector field key missing"


def test_fixture(knnquery4test):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_api_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def setup_template_dir(config_dict):
for TEST_JSON_PATH, TEMPLATE_UUID in _TEMPLATES:
print(TEST_JSON_PATH)
file_path = os.path.join(TEMPLATE_DIR, "{}.json".format(TEMPLATE_UUID))
shutil.copy(TEST_JSON_PATH, file_path )
shutil.copy(TEST_JSON_PATH, file_path)
new_modified_date = datetime.now(tz.utc) - timedelta(hours=24)
timestamp = new_modified_date.timestamp()
os.utime(file_path, times=(timestamp, timestamp))
Expand Down

0 comments on commit 34a93b9

Please sign in to comment.