diff --git a/README.md b/README.md index 76e5c99d..17aca221 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,7 @@ Response Requests >   **GET** `/variants/get/by/variantId`
>    params: -> - chromosome : **string** `( 1-23, X, Y, M )` +> - chromosome : **string** `( 1-23, X, Y, MT )` > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele ( "A" | "C" | "G" | "T" or some combination thereof )` @@ -214,10 +214,11 @@ Requests > - sortByPosition : **string** `( | asc | desc)` > - includeInfoInResultSet : **boolean** `(true | false)` > - genotype : **string** `( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )` +> - getSampleIdsOnly : **bool** *`(optional) - default: false `* > >   **GET** `/variants/count/by/variantId`
>    params: -> - chromosome : **string** `( 1-23, X, Y, M )` +> - chromosome : **string** `( 1-23, X, Y, MT )` > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele` @@ -227,7 +228,7 @@ Requests >   **GET** `/variants/get/by/sampleId`
>    params: -> - chromosome : **string** `( 1-23, X, Y, M )` +> - chromosome : **string** `( 1-23, X, Y, MT )` > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele` @@ -240,7 +241,7 @@ Requests > >   **GET** `/variants/count/by/sampleId`
>    params: -> - chromosome : **string** `( 1-23, X, Y, M )` +> - chromosome : **string** `( 1-23, X, Y, MT )` > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele` @@ -257,14 +258,18 @@ Generalized Response Body Structure >{ > "status": `number` (200 - 500), > "message": `string` ("Success" | "Error"), -> "data": [ +> "results": [ > { -> "variantId": `string`, -> "sampleId": `string`, -> "count": `number`, -> "results": [ +> "query": `string`, // reflective of the type of id queried for, i.e 'variantId:abc123', or 'sampleId:HG0001 +> "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), // reflective of the assembly id queried for +> "count": `number`, // this field is only present when performing a COUNT query +> "start": `number`, // reflective of the provided lowerBound parameter, 0 if none +> "end": `number`, // reflective of the provided upperBound parameter, 0 if none +> "chromosome": `string`, // reflective of the chromosome queried for +> "calls": [ // this field is only present when performing a GET query > { -> "filter": `string`, +> "id": `string`, // variantId +> "chrom": `string`, > "pos": `number`, > "ref": `[]string`, // list of alleles > "alt": `[]string`, // list of alleles @@ -277,21 +282,9 @@ Generalized Response Body Structure > ], > "format":`string`, > "qual": `number`, -> "id": `string`, -> "sample": { -> "id": `string`, -> "variation": { -> "genotype": { -> "phased": `boolean`, -> "alleleLeft": `number`, -> "alleleRight": `number`, -> "zygosity": `number` (0 : "Unknown" | 1 : "Heterozygous" | 2 : "HomozygousReference" | 3 : "HomozygousAlternate") -> }, -> "genotypeProbability": `[]float` | null, -> "phredScaleLikelyhood": `[]float` | null, -> } -> }, -> "fileId": `string` (UUID), +> "filter": `string`, +> "sampleId": `string`, +> "genotype_type": `string ( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )`, > "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), > }, > ... diff --git a/docker-compose.yaml b/docker-compose.yaml index 39e27936..e4fd2732 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -48,6 +48,7 @@ services: - GOHAN_API_VCF_PATH=${GOHAN_API_CONTAINERIZED_VCF_PATH} - GOHAN_API_GTF_PATH=${GOHAN_API_CONTAINERIZED_GTF_PATH} - GOHAN_API_API_DRS_BRIDGE_DIR=${GOHAN_API_API_DRS_BRIDGE_DIR_CONTAINERIZED} + - GOHAN_API_BULK_INDEXING_CAP=${GOHAN_API_BULK_INDEXING_CAP} - GOHAN_API_FILE_PROC_CONC_LVL=${GOHAN_API_FILE_PROC_CONC_LVL} - GOHAN_API_LINE_PROC_CONC_LVL=${GOHAN_API_LINE_PROC_CONC_LVL} diff --git a/src/api/models/constants/main.go b/src/api/models/constants/main.go index a86e3849..eedfecad 100644 --- a/src/api/models/constants/main.go +++ b/src/api/models/constants/main.go @@ -1,5 +1,7 @@ package constants +var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"} + /* Defines a set of base level constants and enums to be used diff --git a/src/api/models/constants/zygosity/main.go b/src/api/models/constants/zygosity/main.go index fe174494..65d1e470 100644 --- a/src/api/models/constants/zygosity/main.go +++ b/src/api/models/constants/zygosity/main.go @@ -14,3 +14,18 @@ const ( func IsKnown(value int) bool { return value > int(Unknown) && value <= int(HomozygousAlternate) } + +func ZygosityToString(zyg constants.Zygosity) string { + switch zyg { + case Heterozygous: + return "HETEROZYGOUS" + case HomozygousAlternate: + return "HOMOZYGOUS_ALTERNATE" + case HomozygousReference: + return "HOMOZYGOUS_REFERENCE" + default: + return "UNKNOWN" + } +} + +// TODO: StringOrIntToZygosity diff --git a/src/api/models/dtos.go b/src/api/models/dtos.go deleted file mode 100644 index e57e417e..00000000 --- a/src/api/models/dtos.go +++ /dev/null @@ -1,42 +0,0 @@ -package models - -import "api/models/constants" - -type BentoV2CompatibleVariantsResponseDTO struct { - Results []BentoV2CompatibleVariantResponseCallsModel `json:"results"` -} -type BentoV2CompatibleVariantResponseCallsModel struct { - AssemblyId constants.AssemblyId `json:"assembly_id"` - Calls []BentoV2CompatibleVariantResponseDataModel `json:"calls"` - Chromosome string `json:"chromosome"` - Start int `json:"start"` - End int `json:"end"` -} -type BentoV2CompatibleVariantResponseDataModel struct { - SampleId string `json:"sample_id"` - GenotypeType string `json:"genotype_type"` -} - -// -- - -type VariantsResponseDTO struct { - Status int `json:"status"` - Message string `json:"message"` - Data []VariantResponseDataModel `json:"data"` -} -type VariantResponseDataModel struct { - VariantId string `json:"variantId"` - SampleId string `json:"sampleId"` - Count int `json:"count"` - Results interface{} `json:"results"` // i.e.: []Variant or []string -} - -// -- -- - -type GenesResponseDTO struct { - Status int `json:"status"` - Message string `json:"message"` - Term string `json:"term"` - Count int `json:"count"` - Results []Gene `json:"results"` // []Gene -} diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go new file mode 100644 index 00000000..1c540dc5 --- /dev/null +++ b/src/api/models/dtos/main.go @@ -0,0 +1,65 @@ +package dtos + +import ( + "api/models/constants" + "api/models/indexes" +) + +// ---- Variants +type VariantReponse struct { + Status int `json:"status,omitempty"` + Message string `json:"message,omitempty"` +} +type VariantGetReponse struct { + VariantReponse + Results []VariantGetResult `json:"results"` +} +type VariantCountReponse struct { + VariantReponse + Results []VariantCountResult `json:"results"` +} + +type VariantResult struct { + Query string `json:"query,omitempty"` + AssemblyId constants.AssemblyId `json:"assembly_id"` + Chromosome string `json:"chromosome"` + Start int `json:"start"` + End int `json:"end"` +} + +type VariantGetResult struct { + VariantResult + Calls []VariantCall `json:"calls"` +} +type VariantCountResult struct { + VariantResult + Count int `json:"count"` +} + +type VariantCall struct { + Chrom string `json:"chrom,omitempty"` + Pos int `json:"pos,omitempty"` + Id string `json:"id,omitempty"` + Ref []string `json:"ref,omitempty"` + Alt []string `json:"alt,omitempty"` + Format []string `json:"format,omitempty"` + Qual int `json:"qual,omitempty"` + Filter string `json:"filter,omitempty"` + + Info []indexes.Info `json:"info,omitempty"` // TODO; refactor? + + SampleId string `json:"sample_id"` + GenotypeType string `json:"genotype_type"` + // TODO: GenotypeProbability, PhredScaleLikelyhood ? + + AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"` +} + +// -- Genes +type GenesResponseDTO struct { + Status int `json:"status"` + Message string `json:"message"` + Term string `json:"term"` + Count int `json:"count"` + Results []indexes.Gene `json:"results"` // []Gene +} diff --git a/src/api/models/elasticsearch.go b/src/api/models/indexes/main.go similarity index 91% rename from src/api/models/elasticsearch.go rename to src/api/models/indexes/main.go index 942af534..3a6780a5 100644 --- a/src/api/models/elasticsearch.go +++ b/src/api/models/indexes/main.go @@ -1,11 +1,9 @@ -package models +package indexes import ( c "api/models/constants" ) -var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"} - type Variant struct { Chrom string `json:"chrom"` Pos int `json:"pos"` diff --git a/src/api/models/ingest/structs/main.go b/src/api/models/ingest/structs/main.go index d6010fd5..7edae19d 100644 --- a/src/api/models/ingest/structs/main.go +++ b/src/api/models/ingest/structs/main.go @@ -1,16 +1,16 @@ package structs import ( - "api/models" + "api/models/indexes" "sync" ) type IngestionQueueStructure struct { - Variant *models.Variant + Variant *indexes.Variant WaitGroup *sync.WaitGroup } type GeneIngestionQueueStructure struct { - Gene *models.Gene + Gene *indexes.Gene WaitGroup *sync.WaitGroup } diff --git a/src/api/models/config.go b/src/api/models/main.go similarity index 100% rename from src/api/models/config.go rename to src/api/models/main.go diff --git a/src/api/mvc/genes.go b/src/api/mvc/genes.go index ef7ff320..f3aa9e93 100644 --- a/src/api/mvc/genes.go +++ b/src/api/mvc/genes.go @@ -2,10 +2,10 @@ package mvc import ( "api/contexts" - "api/models" "api/models/constants" assemblyId "api/models/constants/assembly-id" "api/models/constants/chromosome" + "api/models/dtos" "api/models/ingest" "api/models/ingest/structs" esRepo "api/repositories/elasticsearch" @@ -22,6 +22,8 @@ import ( "sync" "time" + "api/models/indexes" + "github.com/labstack/echo" "github.com/mitchellh/mapstructure" ) @@ -268,7 +270,7 @@ func GenesIngest(c echo.Context) error { return } - discoveredGene := &models.Gene{ + discoveredGene := &indexes.Gene{ Name: geneName, Chrom: chromosomeClean, Start: start, @@ -370,13 +372,13 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error { mapstructure.Decode(docsHits, &allDocHits) // grab _source for each hit - var allSources []models.Gene + var allSources []indexes.Gene for _, r := range allDocHits { source := r["_source"].(map[string]interface{}) // cast map[string]interface{} to struct - var resultingVariant models.Gene + var resultingVariant indexes.Gene mapstructure.Decode(source, &resultingVariant) // accumulate structs @@ -385,7 +387,7 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error { fmt.Printf("Found %d docs!\n", len(allSources)) - geneResponseDTO := models.GenesResponseDTO{ + geneResponseDTO := dtos.GenesResponseDTO{ Term: term, Count: len(allSources), Results: allSources, diff --git a/src/api/mvc/variants.go b/src/api/mvc/variants.go index ba1b790d..cf8c8e40 100644 --- a/src/api/mvc/variants.go +++ b/src/api/mvc/variants.go @@ -16,20 +16,23 @@ import ( "time" "api/contexts" - "api/models" "api/models/constants" a "api/models/constants/assembly-id" gq "api/models/constants/genotype-query" s "api/models/constants/sort" + "api/models/dtos" + "api/models/indexes" "api/models/ingest" esRepo "api/repositories/elasticsearch" "api/utils" + "api/models/constants/zygosity" + "github.com/elastic/go-elasticsearch/v7" "github.com/google/uuid" + "github.com/mitchellh/mapstructure" "github.com/labstack/echo" - "github.com/mitchellh/mapstructure" ) func VariantsIngestionStats(c echo.Context) error { @@ -128,7 +131,6 @@ func VariantsIngest(c echo.Context) error { } startTime := time.Now() - fmt.Printf("Ingest Start: %s\n", startTime) // get vcf files @@ -240,7 +242,14 @@ func VariantsIngest(c echo.Context) error { // --- decompress vcf.gz fmt.Printf("Decompressing %s !\n", gzippedFileName) - gzippedFilePath := fmt.Sprintf("%s%s", vcfPath, gzippedFileName) + var separator string + if strings.HasPrefix(gzippedFileName, "/") { + separator = "" + } else { + separator = "/" + } + + gzippedFilePath := fmt.Sprintf("%s%s%s", vcfPath, separator, gzippedFileName) r, err := os.Open(gzippedFilePath) if err != nil { msg := fmt.Sprintf("error opening %s: %s\n", gzippedFileName, err) @@ -257,7 +266,7 @@ func VariantsIngest(c echo.Context) error { // such that DRS can load the file into memory to process rather than receiving // the file from an upload, thus utilizing it's already-exisiting /private/ingest endpoind // ----- - tmpDestinationFileName := fmt.Sprintf("%s%s", cfg.Api.BridgeDirectory, gzippedFileName) + tmpDestinationFileName := fmt.Sprintf("%s%s%s", cfg.Api.BridgeDirectory, separator, gzippedFileName) // prepare directory inside bridge directory partialTmpDir, _ := path.Split(gzippedFileName) @@ -403,19 +412,20 @@ func VariantsIngest(c echo.Context) error { defer r.Close() // --- load vcf into memory and ingest the vcf file into elasticsearch - fmt.Printf("Begin processing %s !\n", vcfFilePath) + beginProcessingTime := time.Now() + fmt.Printf("Begin processing %s at [%s]\n", vcfFilePath, beginProcessingTime) ingestionService.ProcessVcf(vcfFilePath, drsFileId, assemblyId, filterOutHomozygousReferences, cfg.Api.LineProcessingConcurrencyLevel) + fmt.Printf("Ingest duration for file at %s : %s\n", vcfFilePath, time.Since(beginProcessingTime)) // --- delete the temporary vcf file fmt.Printf("Removing temporary file %s !\n", vcfFilePath) os.Remove(vcfFilePath) + fmt.Printf("Removal done!") // --- delete full tmp path and all contents // (WARNING : Only do this when running over a single file) //os.RemoveAll(vcfTmpPath) - fmt.Printf("Ingest duration for file at %s : %s\n", vcfFilePath, time.Since(startTime)) - reqStat.State = ingest.Done ingestionService.IngestRequestChan <- reqStat }(_fileName, _newRequestState) @@ -558,15 +568,11 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error // --- // prepare response - var respDTO = make(map[string]interface{}) - respDTO["DataType"] = "variant" - + respDTO := dtos.VariantGetReponse{ + Results: make([]dtos.VariantGetResult, 0), + } respDTOMux := sync.RWMutex{} - // initialize length 0 to avoid nil response - tmpResults := make([]interface{}, 0) - tmpCalls := []models.BentoV2CompatibleVariantResponseCallsModel{} - var errors []error errorMux := sync.RWMutex{} @@ -578,17 +584,22 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error go func(_id string) { defer wg.Done() - var variantRespDataModel = make(map[string]interface{}) - // variantRespDataModel := models.VariantResponseDataModel{} + variantResult := dtos.VariantGetResult{ + Calls: make([]dtos.VariantCall, 0), + } var ( docs map[string]interface{} searchErr error ) if isVariantIdQuery { + fmt.Printf("Executing Get-Variants for VariantId %s\n", _id) + + // only set query string if + // 'getSampleIdsOnly' is false + // (current support for bentoV2 + bento_federation_service integration) if !getSampleIdsOnly { - variantRespDataModel["VariantId"] = _id - fmt.Printf("Executing Get-Variants for VariantId %s\n", _id) + variantResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor } docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, @@ -600,10 +611,15 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error getSampleIdsOnly) } else { // implied sampleId query - variantRespDataModel["SampleId"] = _id - fmt.Printf("Executing Get-Samples for SampleId %s\n", _id) + // only set query string if + // 'getSampleIdsOnly' is false + // (current support for bentoV2 + bento_federation_service integration) + if !getSampleIdsOnly { + variantResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor + } + docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, "", _id, // note : "" is for variantId @@ -619,21 +635,46 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error return } - // query for each id + // -- map variant index models to appropriate variant result + call dto models + variantResult.AssemblyId = assemblyId + variantResult.Chromosome = chromosome + variantResult.Start = lowerBound + variantResult.End = upperBound + + if getSampleIdsOnly { + // gather data from "aggregations" + docsBuckets := docs["aggregations"].(map[string]interface{})["sampleIds"].(map[string]interface{})["buckets"] + allDocBuckets := []map[string]interface{}{} + mapstructure.Decode(docsBuckets, &allDocBuckets) + + for _, r := range allDocBuckets { + sampleId := r["key"].(string) - if !getSampleIdsOnly { + // TEMP : re-capitalize sampleIds retrieved from elasticsearch at response time + // TODO: touch up elasticsearch ingestion/parsing settings + // to not automatically force all sampleIds to lowercase when indexing + sampleId = strings.ToUpper(sampleId) + + // accumulate sample Id's + variantResult.Calls = append(variantResult.Calls, dtos.VariantCall{ + SampleId: sampleId, + GenotypeType: string(genotype), + }) + } + } else { + // gather data from "hits" docsHits := docs["hits"].(map[string]interface{})["hits"] allDocHits := []map[string]interface{}{} mapstructure.Decode(docsHits, &allDocHits) // grab _source for each hit - var allSources []models.Variant + var allSources []indexes.Variant for _, r := range allDocHits { source := r["_source"].(map[string]interface{}) // cast map[string]interface{} to struct - var resultingVariant models.Variant + var resultingVariant indexes.Variant mapstructure.Decode(source, &resultingVariant) // accumulate structs @@ -642,37 +683,36 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error fmt.Printf("Found %d docs!\n", len(allSources)) - variantRespDataModel["Results"] = allSources - - respDTOMux.Lock() - tmpResults = append(tmpResults, variantRespDataModel) - respDTOMux.Unlock() - } else { - // TODO: refactor this 'else' statement - docsBuckets := docs["aggregations"].(map[string]interface{})["sampleIds"].(map[string]interface{})["buckets"] - allDocBuckets := []map[string]interface{}{} - mapstructure.Decode(docsBuckets, &allDocBuckets) - - for _, r := range allDocBuckets { - sampleId := r["key"].(string) - - // cast map[string]interface{} to struct - simplifiedResponse := models.BentoV2CompatibleVariantResponseDataModel{ - SampleId: strings.ToUpper(sampleId), - GenotypeType: string(genotype), - } - - call := models.BentoV2CompatibleVariantResponseCallsModel{} - call.AssemblyId = assemblyId - call.Chromosome = chromosome - call.Start = lowerBound - call.End = upperBound - call.Calls = append(call.Calls, simplifiedResponse) - - // accumulate sample Id's - tmpCalls = append(tmpCalls, call) + for _, source := range allSources { + // TEMP : re-capitalize sampleIds retrieved from elasticsearch at response time + // TODO: touch up elasticsearch ingestion/parsing settings + // to not automatically force all sampleIds to lowercase when indexing + sampleId := strings.ToUpper(source.Sample.Id) + + variantResult.Calls = append(variantResult.Calls, dtos.VariantCall{ + Chrom: source.Chrom, + Pos: source.Pos, + Id: source.Id, + Ref: source.Ref, + Alt: source.Alt, + Format: source.Format, + Qual: source.Qual, + Filter: source.Filter, + + Info: source.Info, + + SampleId: sampleId, + GenotypeType: zygosity.ZygosityToString(source.Sample.Variation.Genotype.Zygosity), + + AssemblyId: source.AssemblyId, + }) } } + // -- + + respDTOMux.Lock() + respDTO.Results = append(respDTO.Results, variantResult) + respDTOMux.Unlock() }(id) } @@ -680,33 +720,19 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error wg.Wait() if len(errors) == 0 { - respDTO["Status"] = 200 - respDTO["Message"] = "Success" - } else { - respDTO["Status"] = 500 - respDTO["Message"] = "Something went wrong.. Please contact the administrator!" - } - - // cast generic map[string]interface{} to type - // depending on `getSampleIdsOnly` - if getSampleIdsOnly { - respDTO["Results"] = tmpCalls + // only set status and message if + // 'getSampleIdsOnly' is false + // (current support for bentoV2 + bento_federation_service integration) + if !getSampleIdsOnly { + respDTO.Status = 200 + respDTO.Message = "Success" + } } else { - respDTO["Data"] = tmpResults + respDTO.Status = 500 + respDTO.Message = "Something went wrong.. Please contact the administrator!" } - // TODO: Refactor - if getSampleIdsOnly { - var dto models.BentoV2CompatibleVariantsResponseDTO - mapstructure.Decode(respDTO, &dto) - - return c.JSON(http.StatusOK, dto) - } else { - var dto models.VariantsResponseDTO - mapstructure.Decode(respDTO, &dto) - - return c.JSON(http.StatusOK, dto) - } + return c.JSON(http.StatusOK, respDTO) } func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error { @@ -714,7 +740,9 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro var es, chromosome, lowerBound, upperBound, reference, alternative, genotype, assemblyId = retrieveCommonElements(c) - respDTO := models.VariantsResponseDTO{} + respDTO := dtos.VariantCountReponse{ + Results: make([]dtos.VariantCountResult, 0), + } respDTOMux := sync.RWMutex{} var errors []error @@ -727,16 +755,15 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro go func(_id string) { defer wg.Done() - variantRespDataModel := models.VariantResponseDataModel{} + countResult := dtos.VariantCountResult{} var ( docs map[string]interface{} countError error ) if isVariantIdQuery { - variantRespDataModel.VariantId = _id - fmt.Printf("Executing Count-Variants for VariantId %s\n", _id) + countResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, @@ -744,9 +771,8 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro reference, alternative, genotype, assemblyId) } else { // implied sampleId query - variantRespDataModel.SampleId = _id - fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) + countResult.Query = fmt.Sprintf("sampleId:%s", _id) // TODO: Refactor docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, @@ -760,11 +786,15 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro errorMux.Unlock() return } + countResult.AssemblyId = assemblyId + countResult.Chromosome = chromosome + countResult.Start = lowerBound + countResult.End = upperBound - variantRespDataModel.Count = int(docs["count"].(float64)) + countResult.Count = int(docs["count"].(float64)) respDTOMux.Lock() - respDTO.Data = append(respDTO.Data, variantRespDataModel) + respDTO.Results = append(respDTO.Results, countResult) respDTOMux.Unlock() }(id) diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index c1e9baff..b2a6ca10 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -25,8 +25,11 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" + "api/models/indexes" + "github.com/Jeffail/gabs" "github.com/elastic/go-elasticsearch/v7" "github.com/elastic/go-elasticsearch/v7/esutil" @@ -75,7 +78,7 @@ func NewIngestionService(es *elasticsearch.Client, cfg *models.Config) *Ingestio Client: iz.ElasticsearchClient, NumWorkers: numWorkers, // FlushBytes: int(flushBytes), // The flush threshold in bytes (default: 5MB ?) - FlushInterval: time.Second, // The periodic flush interval + // FlushInterval: time.Second, // The periodic flush interval }) iz.IngestionBulkIndexer = bi @@ -83,8 +86,8 @@ func NewIngestionService(es *elasticsearch.Client, cfg *models.Config) *Ingestio Index: "genes", Client: iz.ElasticsearchClient, NumWorkers: numWorkers, - //FlushBytes: int(64), // The flush threshold in bytes (default: 5MB ?) - FlushInterval: 3 * time.Second, // The periodic flush interval + // FlushBytes: int(64), // The flush threshold in bytes (default: 5MB ?) + // FlushInterval: 3 * time.Second, // The periodic flush interval }) iz.GeneIngestionBulkIndexer = gbi @@ -96,7 +99,7 @@ func NewIngestionService(es *elasticsearch.Client, cfg *models.Config) *Ingestio func (i *IngestionService) Init() { // safeguard to prevent multiple initilizations if !i.Initialized { - // spin up a listener for ingest request updates + // spin up a listener for both variant and gene ingest request updates go func() { for { select { @@ -125,7 +128,7 @@ func (i *IngestionService) Init() { } }() - // spin up a listener for each bulk indexing + // spin up a listener for both variant and gene bulk indexing go func() { for { select { @@ -333,6 +336,9 @@ func (i *IngestionService) ProcessVcf( scanner := bufio.NewScanner(f) var discoveredHeaders bool = false var headers []string + headerSampleIds := make(map[int]string) + + skippedHomozygousReferencesCount := int32(0) var _fileWG sync.WaitGroup @@ -350,6 +356,16 @@ func (i *IngestionService) ProcessVcf( if strings.Contains(line, "CHROM") { // Split the string by tabs headers = strings.Split(line, "\t") + + for id, header := range headers { + // determine if header is a default VCF header. + // if it is not, assume it's a sampleId and keep + // track of it with an id + if !utils.StringInSlice(strings.ToLower(strings.TrimSpace(strings.ReplaceAll(header, "#", ""))), constants.VcfHeaders) { + headerSampleIds[len(constants.VcfHeaders)-id] = header + } + } + discoveredHeaders = true fmt.Println("Found the headers: ", headers) @@ -392,7 +408,7 @@ func (i *IngestionService) ProcessVcf( value := strings.TrimSpace(rc) // if not a vcf header, assume it's a sampleId header - if utils.StringInSlice(key, models.VcfHeaders) { + if utils.StringInSlice(key, constants.VcfHeaders) { // filter field type by column name if key == "chrom" { @@ -405,7 +421,7 @@ func (i *IngestionService) ProcessVcf( tmpVariant[key] = value tmpVariantMapMutex.Unlock() } else { - // TODO: skip this call + // skip this call skipThisCall = true // redundant? @@ -447,7 +463,7 @@ func (i *IngestionService) ProcessVcf( tmpVariant[key] = value tmpVariantMapMutex.Unlock() } else if key == "info" { - var allInfos []*models.Info + var allInfos []*indexes.Info // Split all alleles by semi-colon semiColonSeparations := strings.Split(value, ";") @@ -457,12 +473,12 @@ func (i *IngestionService) ProcessVcf( equalitySeparations := strings.Split(scSep, "=") if len(equalitySeparations) == 2 { - allInfos = append(allInfos, &models.Info{ + allInfos = append(allInfos, &indexes.Info{ Id: equalitySeparations[0], Value: equalitySeparations[1], }) } else { // len(equalitySeparations) == 1 - allInfos = append(allInfos, &models.Info{ + allInfos = append(allInfos, &indexes.Info{ Id: "", Value: equalitySeparations[0], }) @@ -479,13 +495,27 @@ func (i *IngestionService) ProcessVcf( tmpVariantMapMutex.Unlock() } } else { // assume its a sampleId header - tmpSamplesMutex.Lock() + allValues := strings.Split(value, ":") + + // ---- filter out homozygous reference calls + // support for multi-sampled calls + // assume first component of allValues is the genotype + genoTypeValue := allValues[0] + if filterOutHomozygousReferences && (genoTypeValue == "0|0" || genoTypeValue == "0/0") { + // skip adding this sample to the 'tmpSamples' list which + // then goes to be further processed into a variant document + // increase count of skipped calls + atomic.AddInt32(&skippedHomozygousReferencesCount, 1) + + return + } + + tmpSamplesMutex.Lock() tmpSamples = append(tmpSamples, map[string]interface{}{ "key": key, - "values": strings.Split(value, ":"), + "values": allValues, }) - tmpSamplesMutex.Unlock() } }(rowIndex, rowComponent, &rowWg) @@ -500,7 +530,7 @@ func (i *IngestionService) ProcessVcf( } // --- TODO: prep formats + samples - var samples []*models.Sample + var samples []*indexes.Sample // ---- get genotype stuff var ( @@ -530,8 +560,8 @@ func (i *IngestionService) ProcessVcf( } for _, ts := range tmpSamples { - sample := &models.Sample{} - variation := &models.Variation{} + sample := &indexes.Sample{} + variation := &indexes.Variation{} tmpKeyString := ts["key"].(string) tmpValueStrings := ts["values"].([]string) @@ -582,7 +612,7 @@ func (i *IngestionService) ProcessVcf( } } - variation.Genotype = models.Genotype{ + variation.Genotype = indexes.Genotype{ Phased: phased, Zygosity: zyg, } @@ -613,14 +643,6 @@ func (i *IngestionService) ProcessVcf( sample.Id = tmpKeyString sample.Variation = *variation - // ---- filter out homozygous reference calls - // TODO: determine if this is the most optimal place - // to perform this verification - if filterOutHomozygousReferences && - sample.Variation.Genotype.Zygosity == z.HomozygousReference { - continue - } - samples = append(samples, sample) } @@ -629,12 +651,17 @@ func (i *IngestionService) ProcessVcf( // references, and thus maybe all samples from the call // [i.e. if this is a single-sample VCF]) if len(samples) > 0 { + + // for multi-sample vcfs, add 1 to the waitgroup for + // each sample (minus 1 given the initial addition) + fileWg.Add(len(samples) - 1) + // Create a whole variant document for each sample found on this VCF line // TODO: revisit this model as it is surely not storage efficient for _, sample := range samples { tmpVariant["sample"] = sample // --- push to a bulk "queue" - var resultingVariant models.Variant + var resultingVariant indexes.Variant mapstructure.Decode(tmpVariant, &resultingVariant) // pass variant (along with a waitgroup) to the channel @@ -658,7 +685,8 @@ func (i *IngestionService) ProcessVcf( // let all lines be queued up and processed _fileWG.Wait() - fmt.Printf("File %s waited for and complete!\n", vcfFilePath) + + fmt.Printf("File %s waited for and complete!\n\t- Number of skipped Homozygous Reference calls: %d\n", vcfFilePath, skippedHomozygousReferencesCount) } func (i *IngestionService) FilenameAlreadyRunning(filename string) bool { diff --git a/src/tests/integration/api/api_capacity_test.go b/src/tests/integration/api/api_capacity_test.go index affc129c..87680dc0 100644 --- a/src/tests/integration/api/api_capacity_test.go +++ b/src/tests/integration/api/api_capacity_test.go @@ -1,84 +1,84 @@ package api -import ( - "api/models" - "encoding/json" - "fmt" - "io/ioutil" - "net/http" - "regexp" - "sync" - "testing" - common "tests/common" - - "github.com/stretchr/testify/assert" -) - -func TestCanHandleIngestAllLocalVcfs(_t *testing.T) { - cfg := common.InitConfig() - - var vcfGzfiles []string - - fileInfo, err := ioutil.ReadDir(cfg.Api.LocalVcfPath) - if err != nil { - fmt.Printf("Failed: %s\n", err) - } - - // Filter only .vcf.gz files - for _, file := range fileInfo { - if matched, _ := regexp.MatchString(".vcf.gz", file.Name()); matched { - vcfGzfiles = append(vcfGzfiles, file.Name()) - } else { - fmt.Printf("Skipping %s\n", file.Name()) - } - } - - var combWg sync.WaitGroup - simultaneousRequestsCapacity := 1 // TODO: tweak - simultaneousRequestsQueue := make(chan bool, simultaneousRequestsCapacity) - - // fire and forget - for _, vcfgz := range vcfGzfiles { - simultaneousRequestsQueue <- true - combWg.Add(1) - go func(_vcfgz string, _combWg *sync.WaitGroup) { - defer _combWg.Done() - defer func() { <-simultaneousRequestsQueue }() - - url := cfg.Api.Url + fmt.Sprintf("/variants/ingestion/run?fileNames=%s&assemblyId=GRCH37&filterOutHomozygousReferences=true", _vcfgz) - - fmt.Printf("Calling %s\n", url) - request, _ := http.NewRequest("GET", url, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) - - // -- interpret array of ingestion requests from response - respBody, respBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, respBodyErr) - - // --- transform body bytes to string - respBodyString := string(respBody) - - // -- convert to json and check for error - var respDto models.VariantsResponseDTO - jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) - assert.Nil(_t, jsonUnmarshallingError) - - }(vcfgz, &combWg) - } - - // allowing all lines to be queued up and waited for - for i := 0; i < simultaneousRequestsCapacity; i++ { - simultaneousRequestsQueue <- true - } - - combWg.Wait() -} +// import ( +// "api/models/dtos" +// "encoding/json" +// "fmt" +// "io/ioutil" +// "net/http" +// "regexp" +// "sync" +// "testing" +// common "tests/common" + +// "github.com/stretchr/testify/assert" +// ) + +// func TestCanHandleIngestAllLocalVcfs(_t *testing.T) { +// cfg := common.InitConfig() + +// var vcfGzfiles []string + +// fileInfo, err := ioutil.ReadDir(cfg.Api.LocalVcfPath) +// if err != nil { +// fmt.Printf("Failed: %s\n", err) +// } + +// // Filter only .vcf.gz files +// for _, file := range fileInfo { +// if matched, _ := regexp.MatchString(".vcf.gz", file.Name()); matched { +// vcfGzfiles = append(vcfGzfiles, file.Name()) +// } else { +// fmt.Printf("Skipping %s\n", file.Name()) +// } +// } + +// var combWg sync.WaitGroup +// simultaneousRequestsCapacity := 1 // TODO: tweak +// simultaneousRequestsQueue := make(chan bool, simultaneousRequestsCapacity) + +// // fire and forget +// for _, vcfgz := range vcfGzfiles { +// simultaneousRequestsQueue <- true +// combWg.Add(1) +// go func(_vcfgz string, _combWg *sync.WaitGroup) { +// defer _combWg.Done() +// defer func() { <-simultaneousRequestsQueue }() + +// url := cfg.Api.Url + fmt.Sprintf("/variants/ingestion/run?fileNames=%s&assemblyId=GRCH37&filterOutHomozygousReferences=true", _vcfgz) + +// fmt.Printf("Calling %s\n", url) +// request, _ := http.NewRequest("GET", url, nil) + +// client := &http.Client{} +// response, responseErr := client.Do(request) +// assert.Nil(_t, responseErr) + +// defer response.Body.Close() + +// // this test (at the time of writing) will only work if authorization is disabled +// shouldBe := 200 +// assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) + +// // -- interpret array of ingestion requests from response +// respBody, respBodyErr := ioutil.ReadAll(response.Body) +// assert.Nil(_t, respBodyErr) + +// // --- transform body bytes to string +// respBodyString := string(respBody) + +// // -- convert to json and check for error +// var respDto dtos.VariantReponse +// jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) +// assert.Nil(_t, jsonUnmarshallingError) + +// }(vcfgz, &combWg) +// } + +// // allowing all lines to be queued up and waited for +// for i := 0; i < simultaneousRequestsCapacity; i++ { +// simultaneousRequestsQueue <- true +// } + +// combWg.Wait() +// } diff --git a/src/tests/integration/api/api_gene_test.go b/src/tests/integration/api/api_gene_test.go index 103f3bc2..84707c84 100644 --- a/src/tests/integration/api/api_gene_test.go +++ b/src/tests/integration/api/api_gene_test.go @@ -5,6 +5,8 @@ import ( c "api/models/constants" a "api/models/constants/assembly-id" "api/models/constants/chromosome" + "api/models/dtos" + "api/models/indexes" "encoding/json" "fmt" "io/ioutil" @@ -37,12 +39,12 @@ func TestCanGetGenesByAssemblyIdAndChromosome(t *testing.T) { // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) - From(allDtoResponses).ForEachT(func(dto models.GenesResponseDTO) { + From(allDtoResponses).ForEachT(func(dto dtos.GenesResponseDTO) { // ensure there are results in the response assert.NotNil(t, dto.Results) // check the resulting data - From(dto.Results).ForEachT(func(gene models.Gene) { + From(dto.Results).ForEachT(func(gene indexes.Gene) { // ensure the gene is legit assert.NotNil(t, gene.Name) assert.NotNil(t, gene.AssemblyId) @@ -52,7 +54,7 @@ func TestCanGetGenesByAssemblyIdAndChromosome(t *testing.T) { }) } -func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []models.GenesResponseDTO { +func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []dtos.GenesResponseDTO { cfg := common.InitConfig() // retrieve the overview @@ -64,7 +66,7 @@ func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []model // initialize a common slice in which to // accumulate al responses asynchronously - allDtoResponses := []models.GenesResponseDTO{} + allDtoResponses := []dtos.GenesResponseDTO{} allDtoResponsesMux := sync.RWMutex{} var combWg sync.WaitGroup @@ -143,7 +145,7 @@ func getGenesOverview(_t *testing.T, _cfg *models.Config) map[string]interface{} return overviewRespJson } -func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) models.GenesResponseDTO { +func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) dtos.GenesResponseDTO { queryString := fmt.Sprintf("?chromosome=%s&assemblyId=%s", chromosome, assemblyId) @@ -152,7 +154,7 @@ func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c. return getGetGenesCall(url, _t) } -func getGetGenesCall(url string, _t *testing.T) models.GenesResponseDTO { +func getGetGenesCall(url string, _t *testing.T) dtos.GenesResponseDTO { fmt.Printf("Calling %s\n", url) request, _ := http.NewRequest("GET", url, nil) @@ -174,7 +176,7 @@ func getGetGenesCall(url string, _t *testing.T) models.GenesResponseDTO { respBodyString := string(respBody) // -- convert to json and check for error - var respDto models.GenesResponseDTO + var respDto dtos.GenesResponseDTO jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) assert.Nil(_t, jsonUnmarshallingError) diff --git a/src/tests/integration/api/api_variant_test.go b/src/tests/integration/api/api_variant_test.go index 935b98d0..e4044a79 100644 --- a/src/tests/integration/api/api_variant_test.go +++ b/src/tests/integration/api/api_variant_test.go @@ -7,6 +7,8 @@ import ( gq "api/models/constants/genotype-query" s "api/models/constants/sort" z "api/models/constants/zygosity" + "api/models/dtos" + "api/models/indexes" "encoding/json" "fmt" "io/ioutil" @@ -96,8 +98,8 @@ func TestCanGetVariantsWithoutInfoInResultset(t *testing.T) { // assert that all responses from all combinations have no results for _, dtoResponse := range allDtoResponses { - firstDataPointResults := dtoResponse.Data[0].Results - assert.Nil(t, firstDataPointResults[0].Info) + firstDataPointCalls := dtoResponse.Results[0].Calls + assert.Nil(t, firstDataPointCalls[0].Info) } } @@ -109,17 +111,17 @@ func TestCanGetVariantsWithInfoInResultset(t *testing.T) { // - * accumulate all infos into a single list using the set of // SelectManyT's and the SelectT // - ** iterate over each info in the ForEachT - var accumulatedInfos []*models.Info + var accumulatedInfos []*indexes.Info - From(allDtoResponses).SelectManyT(func(resp models.VariantsResponseDTO) Query { // * - return From(resp.Data) - }).SelectManyT(func(data models.VariantResponseDataModel) Query { - return From(data.Results) - }).SelectManyT(func(variant models.Variant) Query { + From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * + return From(resp.Results) + }).SelectManyT(func(data dtos.VariantGetResult) Query { + return From(data.Calls) + }).SelectManyT(func(variant dtos.VariantCall) Query { return From(variant.Info) - }).SelectT(func(info models.Info) models.Info { + }).SelectT(func(info indexes.Info) indexes.Info { return info - }).ForEachT(func(info models.Info) { // ** + }).ForEachT(func(info indexes.Info) { // ** accumulatedInfos = append(accumulatedInfos, &info) }) @@ -140,17 +142,17 @@ func TestCanGetVariantsInAscendingPositionOrder(t *testing.T) { // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) - From(allDtoResponses).ForEachT(func(dto models.VariantsResponseDTO) { + From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) { // ensure there is data - assert.NotNil(t, dto.Data) + assert.NotNil(t, dto.Results) // check the data - From(dto.Data).ForEachT(func(d models.VariantResponseDataModel) { + From(dto.Results).ForEachT(func(d dtos.VariantGetResult) { // ensure the variants slice is plentiful - assert.NotNil(t, d.Results) + assert.NotNil(t, d.Calls) latestSmallest := 0 - From(d.Results).ForEachT(func(dd models.Variant) { + From(d.Calls).ForEachT(func(dd dtos.VariantCall) { // verify order if latestSmallest != 0 { assert.True(t, latestSmallest <= dd.Pos) @@ -169,17 +171,17 @@ func TestCanGetVariantsInDescendingPositionOrder(t *testing.T) { // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) - From(allDtoResponses).ForEachT(func(dto models.VariantsResponseDTO) { + From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) { // ensure there is data - assert.NotNil(t, dto.Data) + assert.NotNil(t, dto.Results) // check the data - From(dto.Data).ForEachT(func(d models.VariantResponseDataModel) { + From(dto.Results).ForEachT(func(d dtos.VariantGetResult) { // ensure the variants slice is plentiful - assert.NotNil(t, d.Results) + assert.NotNil(t, d.Calls) latestGreatest := 0 - From(d.Results).ForEachT(func(dd models.Variant) { + From(d.Calls).ForEachT(func(dd dtos.VariantCall) { if latestGreatest != 0 { assert.True(t, latestGreatest >= dd.Pos) } @@ -207,14 +209,14 @@ func TestCanGetHomozygousAlternateSamples(t *testing.T) { func TestCanGetHomozygousAlternateVariantsWithVariousReferences(t *testing.T) { // setup - specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) { + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { // ensure test is formatted correctly assert.True(__t, alternativeAllelePattern == "") // validate variant - assert.Contains(__t, variant.Ref, referenceAllelePattern) + assert.Contains(__t, call.Ref, referenceAllelePattern) - validateHomozygousAlternateSample(__t, &variant.Sample) + validateHomozygousAlternateSample(__t, call) } executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) @@ -222,14 +224,14 @@ func TestCanGetHomozygousAlternateVariantsWithVariousReferences(t *testing.T) { func TestCanGetHomozygousReferenceVariantsWithVariousReferences(t *testing.T) { // setup - specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) { + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { // ensure test is formatted correctly assert.True(__t, alternativeAllelePattern == "") // validate variant - assert.Contains(__t, variant.Ref, referenceAllelePattern) + assert.Contains(__t, call.Ref, referenceAllelePattern) - validateHomozygousReferenceSample(__t, &variant.Sample) + validateHomozygousReferenceSample(__t, call) } executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) @@ -237,14 +239,14 @@ func TestCanGetHomozygousReferenceVariantsWithVariousReferences(t *testing.T) { func TestCanGetHeterozygousVariantsWithVariousReferences(t *testing.T) { // setup - specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) { + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { // ensure test is formatted correctly assert.True(__t, alternativeAllelePattern == "") // validate variant - assert.Contains(__t, variant.Ref, referenceAllelePattern) + assert.Contains(__t, call.Ref, referenceAllelePattern) - validateHeterozygousSample(__t, &variant.Sample) + validateHeterozygousSample(__t, call) } // trigger @@ -253,14 +255,14 @@ func TestCanGetHeterozygousVariantsWithVariousReferences(t *testing.T) { func TestCanGetHomozygousAlternateVariantsWithVariousAlternatives(t *testing.T) { // setup - specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) { + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { // ensure test is formatted correctly assert.True(__t, referenceAllelePattern == "") // validate variant - assert.Contains(__t, variant.Alt, alternativeAllelePattern) + assert.Contains(__t, call.Alt, alternativeAllelePattern) - validateHomozygousAlternateSample(__t, &variant.Sample) + validateHomozygousAlternateSample(__t, call) } // trigger @@ -269,14 +271,14 @@ func TestCanGetHomozygousAlternateVariantsWithVariousAlternatives(t *testing.T) func TestCanGetHomozygousReferenceVariantsWithVariousAlternatives(t *testing.T) { // setup - specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) { + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { // ensure test is formatted correctly assert.True(__t, referenceAllelePattern == "") // validate variant - assert.Contains(__t, variant.Alt, alternativeAllelePattern) + assert.Contains(__t, call.Alt, alternativeAllelePattern) - validateHomozygousReferenceSample(__t, &variant.Sample) + validateHomozygousReferenceSample(__t, call) } // trigger @@ -285,14 +287,14 @@ func TestCanGetHomozygousReferenceVariantsWithVariousAlternatives(t *testing.T) func TestCanGetHeterozygousVariantsWithVariousAlternatives(t *testing.T) { // setup - specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) { + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { // ensure test is formatted correctly assert.True(__t, referenceAllelePattern == "") // validate variant - assert.Contains(__t, variant.Alt, alternativeAllelePattern) + assert.Contains(__t, call.Alt, alternativeAllelePattern) - validateHeterozygousSample(__t, &variant.Sample) + validateHeterozygousSample(__t, call) } // trigger @@ -302,7 +304,7 @@ func TestCanGetHeterozygousVariantsWithVariousAlternatives(t *testing.T) { // -- Common utility functions for api tests func executeReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, - specificValidation func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string)) { + specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { // TODO: use some kind of Allele Enum patterns := []string{"A", "C", "T", "G"} @@ -329,70 +331,66 @@ func executeReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, func runAndValidateReferenceOrAlternativeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string, - specificValidation func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string)) { + specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) // assert that all of the responses include sample sets with the appropriate zygosity // - * accumulate all variants into a single list using the set of SelectManyT's and the SelectT // - ** iterate over each variant in the ForEachT - var accumulatedVariants []*models.Variant - - From(allDtoResponses).SelectManyT(func(resp models.VariantsResponseDTO) Query { // * - return From(resp.Data) - }).SelectManyT(func(data models.VariantResponseDataModel) Query { - return From(data.Results) - }).SelectT(func(variant models.Variant) models.Variant { - return variant - }).ForEachT(func(variant models.Variant) { // ** - accumulatedVariants = append(accumulatedVariants, &variant) + // var accumulatedVariants []*indexes.Variant + var accumulatedCalls []*dtos.VariantCall + + From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * + return From(resp.Results) + }).SelectManyT(func(data dtos.VariantGetResult) Query { + return From(data.Calls) + }).ForEachT(func(call dtos.VariantCall) { // ** + accumulatedCalls = append(accumulatedCalls, &call) }) - if len(accumulatedVariants) == 0 { + if len(accumulatedCalls) == 0 { _t.Skip(fmt.Sprintf("No variants returned for patterns ref: '%s', alt: '%s'! Skipping --", referenceAllelePattern, alternativeAllelePattern)) } - for _, v := range accumulatedVariants { + for _, v := range accumulatedCalls { assert.NotNil(_t, v.Id) specificValidation(_t, v, referenceAllelePattern, alternativeAllelePattern) } } -func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, sample *models.Sample)) { +func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, call *dtos.VariantCall)) { allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, "", "") // assert that all of the responses include heterozygous sample sets // - * accumulate all samples into a single list using the set of SelectManyT's and the SelectT // - ** iterate over each sample in the ForEachT - var accumulatedSamples []*models.Sample - - From(allDtoResponses).SelectManyT(func(resp models.VariantsResponseDTO) Query { // * - return From(resp.Data) - }).SelectManyT(func(data models.VariantResponseDataModel) Query { - return From(data.Results) - }).SelectT(func(variant models.Variant) models.Sample { - return variant.Sample - }).ForEachT(func(sample models.Sample) { // ** - accumulatedSamples = append(accumulatedSamples, &sample) + // var accumulatedSamples []*indexes.Sample + var accumulatedCalls []*dtos.VariantCall + + From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * + return From(resp.Results) + }).SelectManyT(func(data dtos.VariantGetResult) Query { + return From(data.Calls) + }).ForEachT(func(call dtos.VariantCall) { // ** + accumulatedCalls = append(accumulatedCalls, &call) }) - if len(accumulatedSamples) == 0 { + if len(accumulatedCalls) == 0 { _t.Skip("No samples returned! Skipping --") } - for _, s := range accumulatedSamples { - assert.NotEmpty(_t, s.Id) - assert.NotEmpty(_t, s.Variation) - assert.NotEmpty(_t, s.Variation.Genotype) - assert.NotEmpty(_t, s.Variation.Genotype.Zygosity) + for _, c := range accumulatedCalls { + assert.NotEmpty(_t, c.SampleId) + assert.NotEmpty(_t, c.GenotypeType) - specificValidation(_t, s) + specificValidation(_t, c) } } -func buildQueryAndMakeGetVariantsCall(chromosome string, sampleId string, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, referenceAllelePattern string, alternativeAllelePattern string, _t *testing.T, _cfg *models.Config) models.VariantsResponseDTO { +func buildQueryAndMakeGetVariantsCall(chromosome string, sampleId string, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, referenceAllelePattern string, alternativeAllelePattern string, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse { queryString := fmt.Sprintf("?chromosome=%s&ids=%s&includeInfoInResultSet=%t&sortByPosition=%s&assemblyId=%s", chromosome, sampleId, includeInfo, sortByPosition, assemblyId) @@ -467,7 +465,7 @@ func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct return allCombinations } -func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []models.VariantsResponseDTO { +func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { cfg := common.InitConfig() // retrieve the overview @@ -483,7 +481,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc // initialize a common slice in which to // accumulate al responses asynchronously - allDtoResponses := []models.VariantsResponseDTO{} + allDtoResponses := []dtos.VariantGetReponse{} allDtoResponsesMux := sync.RWMutex{} var combWg sync.WaitGroup @@ -499,7 +497,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc // make the call dto := buildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, _t, cfg) - assert.Equal(_t, 1, len(dto.Data)) + assert.Equal(_t, 1, len(dto.Results)) // accumulate all response objects // to a common slice in an @@ -515,7 +513,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc return allDtoResponses } -func makeGetVariantsCall(url string, _t *testing.T) models.VariantsResponseDTO { +func makeGetVariantsCall(url string, _t *testing.T) dtos.VariantGetReponse { fmt.Printf("Calling %s\n", url) request, _ := http.NewRequest("GET", url, nil) @@ -537,7 +535,7 @@ func makeGetVariantsCall(url string, _t *testing.T) models.VariantsResponseDTO { respBodyString := string(respBody) // -- convert to json and check for error - var respDto models.VariantsResponseDTO + var respDto dtos.VariantGetReponse jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) assert.Nil(_t, jsonUnmarshallingError) @@ -545,16 +543,19 @@ func makeGetVariantsCall(url string, _t *testing.T) models.VariantsResponseDTO { } // --- sample validation -func validateHeterozygousSample(__t *testing.T, sample *models.Sample) { - assert.True(__t, sample.Variation.Genotype.Zygosity == z.Heterozygous) +func validateHeterozygousSample(__t *testing.T, call *dtos.VariantCall) { + // assert.True(__t, sample.Variation.Genotype.Zygosity == z.Heterozygous) + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Heterozygous)) } -func validateHomozygousReferenceSample(__t *testing.T, sample *models.Sample) { - assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousReference) +func validateHomozygousReferenceSample(__t *testing.T, call *dtos.VariantCall) { + // assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousReference) + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousReference)) } -func validateHomozygousAlternateSample(__t *testing.T, sample *models.Sample) { - assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousAlternate) +func validateHomozygousAlternateSample(__t *testing.T, call *dtos.VariantCall) { + // assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousAlternate) + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousAlternate)) } // --