Skip to content

Commit

Permalink
Merge pull request #9 from bento-platform/releases/v2.3.0
Browse files Browse the repository at this point in the history
releases/v2.3.0
  • Loading branch information
brouillette authored Sep 20, 2021
2 parents f49ee9c + bdd3f3a commit f2959a8
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 22 deletions.
8 changes: 7 additions & 1 deletion src/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,27 +122,33 @@ func main() {
// middleware
gam.MandateChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.ValidatePotentialGenotypeQueryParameter)
e.GET("/variants/get/by/sampleId", mvc.VariantsGetBySampleId,
// middleware
gam.MandateChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.MandateSampleIdsPluralAttribute,
gam.ValidatePotentialGenotypeQueryParameter)

e.GET("/variants/count/by/variantId", mvc.VariantsCountByVariantId,
// middleware
gam.MandateChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.ValidatePotentialGenotypeQueryParameter)
e.GET("/variants/count/by/sampleId", mvc.VariantsCountBySampleId,
// middleware
gam.MandateChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.MandateSampleIdsSingularAttribute,
gam.ValidatePotentialGenotypeQueryParameter)

e.GET("/variants/ingestion/run", mvc.VariantsIngest)
e.GET("/variants/ingestion/run", mvc.VariantsIngest,
// middleware
gam.MandateAssemblyIdAttribute)
e.GET("/variants/ingestion/requests", mvc.GetAllVariantIngestionRequests)

// Run
Expand Down
24 changes: 24 additions & 0 deletions src/api/middleware/assemblyMiddleware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package middleware

import (
assid "api/models/constants/assembly-id"
"net/http"

"github.com/labstack/echo"
)

/*
Echo middleware to ensure a valid `assemblyId` HTTP query parameter was provided
*/
func MandateAssemblyIdAttribute(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
// check for assemblyId query parameter
assemblyId := c.QueryParam("assemblyId")
if len(assemblyId) == 0 || !assid.IsKnownAssemblyId(assemblyId) {
// if no id was provided, or it was invalid, return an error
return echo.NewHTTPError(http.StatusBadRequest, "Missing or unknown assemblyId!")
}

return next(c)
}
}
36 changes: 36 additions & 0 deletions src/api/models/constants/assembly-id/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package assemblyId

import (
"api/models/constants"
"strings"
)

const (
Unknown constants.AssemblyId = "Unknown"

GRCh38 constants.AssemblyId = "GRCh38"
GRCh37 constants.AssemblyId = "GRCh37"
NCBI36 constants.AssemblyId = "NCBI36"
Other constants.AssemblyId = "Other"
)

func CastToAssemblyId(text string) constants.AssemblyId {
switch strings.ToLower(text) {
case "grch38":
return GRCh38
case "grch37":
return GRCh37
case "ncbi36":
return NCBI36
case "other":
return Other
default:
return Unknown
}
}

func IsKnownAssemblyId(text string) bool {
// attempt to cast to assemblyId and
// return if unknown assemblyId
return CastToAssemblyId(text) != Unknown
}
1 change: 1 addition & 0 deletions src/api/models/constants/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package constants
throughout Gohan and it's
associated services.
*/
type AssemblyId string
type GenotypeQuery string
type SearchOperation string
type SortDirection string
Expand Down
4 changes: 3 additions & 1 deletion src/api/models/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ type Variant struct {
Info []Info `json:"info"`

Samples []Sample `json:"samples"`
FileId string `json:"fileId"`

FileId string `json:"fileId"`
AssemblyId c.AssemblyId `json:"assemblyId"`
}

type Info struct {
Expand Down
31 changes: 22 additions & 9 deletions src/api/mvc/variants.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"api/contexts"
"api/models"
"api/models/constants"
a "api/models/constants/assembly-id"
gq "api/models/constants/genotype-query"
s "api/models/constants/sort"
"api/models/ingest"
Expand Down Expand Up @@ -93,6 +94,8 @@ func VariantsIngest(c echo.Context) error {
}
}

assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId"))

startTime := time.Now()

fmt.Printf("Ingest Start: %s\n", startTime)
Expand Down Expand Up @@ -214,7 +217,7 @@ func VariantsIngest(c echo.Context) error {
}

// --- load back into memory and process
ingestionService.ProcessVcf(vcfFilePath, drsFileId)
ingestionService.ProcessVcf(vcfFilePath, drsFileId, assemblyId)

// --- delete the temporary vcf file
os.Remove(vcfFilePath)
Expand Down Expand Up @@ -287,6 +290,10 @@ func GetVariantsOverview(c echo.Context) error {
wg.Add(1)
go callGetBucketsByKeyword("sampleIDs", "samples.id.keyword", &wg)

// get distribution of assembly IDs
wg.Add(1)
go callGetBucketsByKeyword("assemblyIDs", "assemblyId.keyword", &wg)

wg.Wait()

return c.JSON(http.StatusOK, resultsMap)
Expand All @@ -305,7 +312,7 @@ func GetAllVariantIngestionRequests(c echo.Context) error {

func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error {

var es, chromosome, lowerBound, upperBound, reference, alternative, genotype = retrieveCommonElements(c)
var es, chromosome, lowerBound, upperBound, reference, alternative, genotype, assemblyId = retrieveCommonElements(c)

// retrieve other query parameters relevent to this 'get' query ---
sizeQP := c.QueryParam("size")
Expand Down Expand Up @@ -363,7 +370,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
_id, "", // note : "" is for sampleId
reference, alternative,
size, sortByPosition,
includeSamplesInResultSet, genotype)
includeSamplesInResultSet, genotype, assemblyId)
} else {
// implied sampleId query
variantRespDataModel.SampleId = _id
Expand All @@ -375,7 +382,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
"", _id, // note : "" is for variantId
reference, alternative,
size, sortByPosition,
includeSamplesInResultSet, genotype)
includeSamplesInResultSet, genotype, assemblyId)
}

// query for each id
Expand Down Expand Up @@ -420,7 +427,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error

func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error {

var es, chromosome, lowerBound, upperBound, reference, alternative, genotype = retrieveCommonElements(c)
var es, chromosome, lowerBound, upperBound, reference, alternative, genotype, assemblyId = retrieveCommonElements(c)

respDTO := models.VariantsResponseDTO{}
respDTOMux := sync.RWMutex{}
Expand All @@ -444,7 +451,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
docs = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(es,
chromosome, lowerBound, upperBound,
_id, "", // note : "" is for sampleId
reference, alternative, genotype)
reference, alternative, genotype, assemblyId)
} else {
// implied sampleId query
variantRespDataModel.SampleId = _id
Expand All @@ -454,7 +461,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
docs = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(es,
chromosome, lowerBound, upperBound,
"", _id, // note : "" is for variantId
reference, alternative, genotype)
reference, alternative, genotype, assemblyId)
}

variantRespDataModel.Count = int(docs["count"].(float64))
Expand All @@ -474,7 +481,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
return c.JSON(http.StatusOK, respDTO)
}

func retrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, constants.GenotypeQuery) {
func retrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, constants.GenotypeQuery, constants.AssemblyId) {
es := c.(*contexts.GohanContext).Es7Client

chromosome := c.QueryParam("chromosome")
Expand Down Expand Up @@ -519,5 +526,11 @@ func retrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int,
}
}

return es, chromosome, lowerBound, upperBound, reference, alternative, genotype
assemblyId := a.Unknown
assemblyIdQP := c.QueryParam("assemblyId")
if len(assemblyIdQP) > 0 && a.IsKnownAssemblyId(assemblyIdQP) {
assemblyId = a.CastToAssemblyId(assemblyIdQP)
}

return es, chromosome, lowerBound, upperBound, reference, alternative, genotype, assemblyId
}
25 changes: 23 additions & 2 deletions src/api/repositories/elasticsearch/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(es *elasticsearch.Cli
variantId string, sampleId string,
reference string, alternative string,
size int, sortByPosition c.SortDirection,
includeSamplesInResultSet bool, genotype c.GenotypeQuery) map[string]interface{} {
includeSamplesInResultSet bool,
genotype c.GenotypeQuery, assemblyId c.AssemblyId) map[string]interface{} {

// begin building the request body.
mustMap := []map[string]interface{}{{
Expand Down Expand Up @@ -60,6 +61,16 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(es *elasticsearch.Cli
}
}

if assemblyId != "" {
mustMap = append(mustMap, map[string]interface{}{
"match": map[string]interface{}{
"assemblyId": map[string]interface{}{
"query": assemblyId,
},
},
})
}

rangeMapSlice := []map[string]interface{}{}

// TODO: make upperbound and lowerbound nilable, somehow?
Expand Down Expand Up @@ -227,7 +238,8 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(es *elasticsearch.Cli
func CountDocumentsContainerVariantOrSampleIdInPositionRange(es *elasticsearch.Client,
chromosome string, lowerBound int, upperBound int,
variantId string, sampleId string,
reference string, alternative string, genotype c.GenotypeQuery) map[string]interface{} {
reference string, alternative string,
genotype c.GenotypeQuery, assemblyId c.AssemblyId) map[string]interface{} {

// begin building the request body.
mustMap := []map[string]interface{}{{
Expand Down Expand Up @@ -264,6 +276,15 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(es *elasticsearch.C
}
}

if assemblyId != "" {
mustMap = append(mustMap, map[string]interface{}{
"match": map[string]interface{}{
"assemblyId": map[string]interface{}{
"query": assemblyId,
},
},
})
}
rangeMapSlice := []map[string]interface{}{}

// TODO: make upperbound and lowerbound nilable, somehow?
Expand Down
3 changes: 2 additions & 1 deletion src/api/services/ingestion.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (i *IngestionService) UploadVcfGzToDrs(gzippedFilePath string, gzipStream *
return id
}

func (i *IngestionService) ProcessVcf(vcfFilePath string, drsFileId string) {
func (i *IngestionService) ProcessVcf(vcfFilePath string, drsFileId string, assemblyId constants.AssemblyId) {
f, err := os.Open(vcfFilePath)
if err != nil {
fmt.Println("Failed to open file - ", err)
Expand Down Expand Up @@ -277,6 +277,7 @@ func (i *IngestionService) ProcessVcf(vcfFilePath string, drsFileId string) {
tmpVariantMapMutex := sync.RWMutex{}

tmpVariant["fileId"] = drsFileId
tmpVariant["assemblyId"] = assemblyId

var rowWg sync.WaitGroup
rowWg.Add(len(rowComponents))
Expand Down
19 changes: 11 additions & 8 deletions src/tests/integration/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package api
import (
"api/models"
c "api/models/constants"
a "api/models/constants/assembly-id"
gq "api/models/constants/genotype-query"
s "api/models/constants/sort"
z "api/models/constants/zygosity"
Expand Down Expand Up @@ -95,7 +96,6 @@ func TestCanGetVariantsWithoutSamplesInResultset(t *testing.T) {
for _, dtoResponse := range allDtoResponses {
firstDataPointResults := dtoResponse.Data[0].Results
assert.Nil(t, firstDataPointResults[0].Samples)

}
}

Expand Down Expand Up @@ -241,9 +241,9 @@ func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQ
})
}

func buildQueryAndMakeGetVariantsCall(chromosome string, sampleId string, includeSamples bool, sortByPosition c.SortDirection, genotype string, _t *testing.T, _cfg *models.Config) models.VariantsResponseDTO {
func buildQueryAndMakeGetVariantsCall(chromosome string, sampleId string, includeSamples bool, sortByPosition c.SortDirection, genotype string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) models.VariantsResponseDTO {

queryString := fmt.Sprintf("?chromosome=%s&ids=%s&includeSamplesInResultSet=%t&sortByPosition=%s&genotype=%s", chromosome, sampleId, includeSamples, sortByPosition, genotype)
queryString := fmt.Sprintf("?chromosome=%s&ids=%s&includeSamplesInResultSet=%t&sortByPosition=%s&genotype=%s&assemblyId=%s", chromosome, sampleId, includeSamples, sortByPosition, genotype, assemblyId)
url := fmt.Sprintf(VariantsGetBySampleIdsPathWithQueryString, _cfg.Api.Url, queryString)

return makeGetVariantsCall(url, _t)
Expand Down Expand Up @@ -290,12 +290,14 @@ func getVariantsOverview(_t *testing.T, _cfg *models.Config) map[string]interfac
return overviewRespJson
}

func getChromsAndSampleIDs(chromosomeStruct interface{}, sampleIdsStruct interface{}) [][]string {
func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct interface{}, assemblyIdsStruct interface{}) [][]string {
var allCombinations = [][]string{}

for i, _ := range chromosomeStruct.(map[string]interface{}) {
for j, _ := range sampleIdsStruct.(map[string]interface{}) {
allCombinations = append(allCombinations, []string{i, j})
for k, _ := range assemblyIdsStruct.(map[string]interface{}) {
allCombinations = append(allCombinations, []string{i, j, k})
}
}
}

Expand All @@ -309,21 +311,22 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc
overviewJson := getVariantsOverview(_t, cfg)
assert.NotNil(_t, overviewJson)

chromSampleIdCombinations := getChromsAndSampleIDs(overviewJson["chromosomes"], overviewJson["sampleIDs"])
overviewCombinations := getOverviewResultCombinations(overviewJson["chromosomes"], overviewJson["sampleIDs"], overviewJson["assemblyIDs"])

allDtoResponses := []models.VariantsResponseDTO{}
allDtoResponsesMux := sync.RWMutex{}

var combWg sync.WaitGroup
for _, combination := range chromSampleIdCombinations {
for _, combination := range overviewCombinations {
combWg.Add(1)
go func(_wg *sync.WaitGroup, _combination []string) {
defer _wg.Done()

chrom := _combination[0]
sampleId := _combination[1]
assemblyId := a.CastToAssemblyId(_combination[2])

dto := buildQueryAndMakeGetVariantsCall(chrom, sampleId, includeSamples, sortByPosition, genotype, _t, cfg)
dto := buildQueryAndMakeGetVariantsCall(chrom, sampleId, includeSamples, sortByPosition, genotype, assemblyId, _t, cfg)
assert.Equal(_t, 1, len(dto.Data))

allDtoResponsesMux.Lock()
Expand Down

0 comments on commit f2959a8

Please sign in to comment.