diff --git a/README.md b/README.md
index 76e5c99d..17aca221 100644
--- a/README.md
+++ b/README.md
@@ -204,7 +204,7 @@ Response
Requests
> **GET** `/variants/get/by/variantId`
> params:
-> - chromosome : **string** `( 1-23, X, Y, M )`
+> - chromosome : **string** `( 1-23, X, Y, MT )`
> - lowerBound : **number**
> - upperBound : **number**
> - reference : **string** `an allele ( "A" | "C" | "G" | "T" or some combination thereof )`
@@ -214,10 +214,11 @@ Requests
> - sortByPosition : **string** `( | asc | desc)`
> - includeInfoInResultSet : **boolean** `(true | false)`
> - genotype : **string** `( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )`
+> - getSampleIdsOnly : **bool** *`(optional) - default: false `*
>
> **GET** `/variants/count/by/variantId`
> params:
-> - chromosome : **string** `( 1-23, X, Y, M )`
+> - chromosome : **string** `( 1-23, X, Y, MT )`
> - lowerBound : **number**
> - upperBound : **number**
> - reference : **string** `an allele`
@@ -227,7 +228,7 @@ Requests
> **GET** `/variants/get/by/sampleId`
> params:
-> - chromosome : **string** `( 1-23, X, Y, M )`
+> - chromosome : **string** `( 1-23, X, Y, MT )`
> - lowerBound : **number**
> - upperBound : **number**
> - reference : **string** `an allele`
@@ -240,7 +241,7 @@ Requests
>
> **GET** `/variants/count/by/sampleId`
> params:
-> - chromosome : **string** `( 1-23, X, Y, M )`
+> - chromosome : **string** `( 1-23, X, Y, MT )`
> - lowerBound : **number**
> - upperBound : **number**
> - reference : **string** `an allele`
@@ -257,14 +258,18 @@ Generalized Response Body Structure
>{
> "status": `number` (200 - 500),
> "message": `string` ("Success" | "Error"),
-> "data": [
+> "results": [
> {
-> "variantId": `string`,
-> "sampleId": `string`,
-> "count": `number`,
-> "results": [
+> "query": `string`, // reflective of the type of id queried for, i.e 'variantId:abc123', or 'sampleId:HG0001
+> "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), // reflective of the assembly id queried for
+> "count": `number`, // this field is only present when performing a COUNT query
+> "start": `number`, // reflective of the provided lowerBound parameter, 0 if none
+> "end": `number`, // reflective of the provided upperBound parameter, 0 if none
+> "chromosome": `string`, // reflective of the chromosome queried for
+> "calls": [ // this field is only present when performing a GET query
> {
-> "filter": `string`,
+> "id": `string`, // variantId
+> "chrom": `string`,
> "pos": `number`,
> "ref": `[]string`, // list of alleles
> "alt": `[]string`, // list of alleles
@@ -277,21 +282,9 @@ Generalized Response Body Structure
> ],
> "format":`string`,
> "qual": `number`,
-> "id": `string`,
-> "sample": {
-> "id": `string`,
-> "variation": {
-> "genotype": {
-> "phased": `boolean`,
-> "alleleLeft": `number`,
-> "alleleRight": `number`,
-> "zygosity": `number` (0 : "Unknown" | 1 : "Heterozygous" | 2 : "HomozygousReference" | 3 : "HomozygousAlternate")
-> },
-> "genotypeProbability": `[]float` | null,
-> "phredScaleLikelyhood": `[]float` | null,
-> }
-> },
-> "fileId": `string` (UUID),
+> "filter": `string`,
+> "sampleId": `string`,
+> "genotype_type": `string ( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )`,
> "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"),
> },
> ...
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 39e27936..e4fd2732 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -48,6 +48,7 @@ services:
- GOHAN_API_VCF_PATH=${GOHAN_API_CONTAINERIZED_VCF_PATH}
- GOHAN_API_GTF_PATH=${GOHAN_API_CONTAINERIZED_GTF_PATH}
- GOHAN_API_API_DRS_BRIDGE_DIR=${GOHAN_API_API_DRS_BRIDGE_DIR_CONTAINERIZED}
+ - GOHAN_API_BULK_INDEXING_CAP=${GOHAN_API_BULK_INDEXING_CAP}
- GOHAN_API_FILE_PROC_CONC_LVL=${GOHAN_API_FILE_PROC_CONC_LVL}
- GOHAN_API_LINE_PROC_CONC_LVL=${GOHAN_API_LINE_PROC_CONC_LVL}
diff --git a/src/api/models/constants/main.go b/src/api/models/constants/main.go
index a86e3849..eedfecad 100644
--- a/src/api/models/constants/main.go
+++ b/src/api/models/constants/main.go
@@ -1,5 +1,7 @@
package constants
+var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"}
+
/*
Defines a set of base level
constants and enums to be used
diff --git a/src/api/models/constants/zygosity/main.go b/src/api/models/constants/zygosity/main.go
index fe174494..65d1e470 100644
--- a/src/api/models/constants/zygosity/main.go
+++ b/src/api/models/constants/zygosity/main.go
@@ -14,3 +14,18 @@ const (
func IsKnown(value int) bool {
return value > int(Unknown) && value <= int(HomozygousAlternate)
}
+
+func ZygosityToString(zyg constants.Zygosity) string {
+ switch zyg {
+ case Heterozygous:
+ return "HETEROZYGOUS"
+ case HomozygousAlternate:
+ return "HOMOZYGOUS_ALTERNATE"
+ case HomozygousReference:
+ return "HOMOZYGOUS_REFERENCE"
+ default:
+ return "UNKNOWN"
+ }
+}
+
+// TODO: StringOrIntToZygosity
diff --git a/src/api/models/dtos.go b/src/api/models/dtos.go
deleted file mode 100644
index e57e417e..00000000
--- a/src/api/models/dtos.go
+++ /dev/null
@@ -1,42 +0,0 @@
-package models
-
-import "api/models/constants"
-
-type BentoV2CompatibleVariantsResponseDTO struct {
- Results []BentoV2CompatibleVariantResponseCallsModel `json:"results"`
-}
-type BentoV2CompatibleVariantResponseCallsModel struct {
- AssemblyId constants.AssemblyId `json:"assembly_id"`
- Calls []BentoV2CompatibleVariantResponseDataModel `json:"calls"`
- Chromosome string `json:"chromosome"`
- Start int `json:"start"`
- End int `json:"end"`
-}
-type BentoV2CompatibleVariantResponseDataModel struct {
- SampleId string `json:"sample_id"`
- GenotypeType string `json:"genotype_type"`
-}
-
-// --
-
-type VariantsResponseDTO struct {
- Status int `json:"status"`
- Message string `json:"message"`
- Data []VariantResponseDataModel `json:"data"`
-}
-type VariantResponseDataModel struct {
- VariantId string `json:"variantId"`
- SampleId string `json:"sampleId"`
- Count int `json:"count"`
- Results interface{} `json:"results"` // i.e.: []Variant or []string
-}
-
-// -- --
-
-type GenesResponseDTO struct {
- Status int `json:"status"`
- Message string `json:"message"`
- Term string `json:"term"`
- Count int `json:"count"`
- Results []Gene `json:"results"` // []Gene
-}
diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go
new file mode 100644
index 00000000..1c540dc5
--- /dev/null
+++ b/src/api/models/dtos/main.go
@@ -0,0 +1,65 @@
+package dtos
+
+import (
+ "api/models/constants"
+ "api/models/indexes"
+)
+
+// ---- Variants
+type VariantReponse struct {
+ Status int `json:"status,omitempty"`
+ Message string `json:"message,omitempty"`
+}
+type VariantGetReponse struct {
+ VariantReponse
+ Results []VariantGetResult `json:"results"`
+}
+type VariantCountReponse struct {
+ VariantReponse
+ Results []VariantCountResult `json:"results"`
+}
+
+type VariantResult struct {
+ Query string `json:"query,omitempty"`
+ AssemblyId constants.AssemblyId `json:"assembly_id"`
+ Chromosome string `json:"chromosome"`
+ Start int `json:"start"`
+ End int `json:"end"`
+}
+
+type VariantGetResult struct {
+ VariantResult
+ Calls []VariantCall `json:"calls"`
+}
+type VariantCountResult struct {
+ VariantResult
+ Count int `json:"count"`
+}
+
+type VariantCall struct {
+ Chrom string `json:"chrom,omitempty"`
+ Pos int `json:"pos,omitempty"`
+ Id string `json:"id,omitempty"`
+ Ref []string `json:"ref,omitempty"`
+ Alt []string `json:"alt,omitempty"`
+ Format []string `json:"format,omitempty"`
+ Qual int `json:"qual,omitempty"`
+ Filter string `json:"filter,omitempty"`
+
+ Info []indexes.Info `json:"info,omitempty"` // TODO; refactor?
+
+ SampleId string `json:"sample_id"`
+ GenotypeType string `json:"genotype_type"`
+ // TODO: GenotypeProbability, PhredScaleLikelyhood ?
+
+ AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"`
+}
+
+// -- Genes
+type GenesResponseDTO struct {
+ Status int `json:"status"`
+ Message string `json:"message"`
+ Term string `json:"term"`
+ Count int `json:"count"`
+ Results []indexes.Gene `json:"results"` // []Gene
+}
diff --git a/src/api/models/elasticsearch.go b/src/api/models/indexes/main.go
similarity index 91%
rename from src/api/models/elasticsearch.go
rename to src/api/models/indexes/main.go
index 942af534..3a6780a5 100644
--- a/src/api/models/elasticsearch.go
+++ b/src/api/models/indexes/main.go
@@ -1,11 +1,9 @@
-package models
+package indexes
import (
c "api/models/constants"
)
-var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"}
-
type Variant struct {
Chrom string `json:"chrom"`
Pos int `json:"pos"`
diff --git a/src/api/models/ingest/structs/main.go b/src/api/models/ingest/structs/main.go
index d6010fd5..7edae19d 100644
--- a/src/api/models/ingest/structs/main.go
+++ b/src/api/models/ingest/structs/main.go
@@ -1,16 +1,16 @@
package structs
import (
- "api/models"
+ "api/models/indexes"
"sync"
)
type IngestionQueueStructure struct {
- Variant *models.Variant
+ Variant *indexes.Variant
WaitGroup *sync.WaitGroup
}
type GeneIngestionQueueStructure struct {
- Gene *models.Gene
+ Gene *indexes.Gene
WaitGroup *sync.WaitGroup
}
diff --git a/src/api/models/config.go b/src/api/models/main.go
similarity index 100%
rename from src/api/models/config.go
rename to src/api/models/main.go
diff --git a/src/api/mvc/genes.go b/src/api/mvc/genes.go
index ef7ff320..f3aa9e93 100644
--- a/src/api/mvc/genes.go
+++ b/src/api/mvc/genes.go
@@ -2,10 +2,10 @@ package mvc
import (
"api/contexts"
- "api/models"
"api/models/constants"
assemblyId "api/models/constants/assembly-id"
"api/models/constants/chromosome"
+ "api/models/dtos"
"api/models/ingest"
"api/models/ingest/structs"
esRepo "api/repositories/elasticsearch"
@@ -22,6 +22,8 @@ import (
"sync"
"time"
+ "api/models/indexes"
+
"github.com/labstack/echo"
"github.com/mitchellh/mapstructure"
)
@@ -268,7 +270,7 @@ func GenesIngest(c echo.Context) error {
return
}
- discoveredGene := &models.Gene{
+ discoveredGene := &indexes.Gene{
Name: geneName,
Chrom: chromosomeClean,
Start: start,
@@ -370,13 +372,13 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error {
mapstructure.Decode(docsHits, &allDocHits)
// grab _source for each hit
- var allSources []models.Gene
+ var allSources []indexes.Gene
for _, r := range allDocHits {
source := r["_source"].(map[string]interface{})
// cast map[string]interface{} to struct
- var resultingVariant models.Gene
+ var resultingVariant indexes.Gene
mapstructure.Decode(source, &resultingVariant)
// accumulate structs
@@ -385,7 +387,7 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error {
fmt.Printf("Found %d docs!\n", len(allSources))
- geneResponseDTO := models.GenesResponseDTO{
+ geneResponseDTO := dtos.GenesResponseDTO{
Term: term,
Count: len(allSources),
Results: allSources,
diff --git a/src/api/mvc/variants.go b/src/api/mvc/variants.go
index ba1b790d..cf8c8e40 100644
--- a/src/api/mvc/variants.go
+++ b/src/api/mvc/variants.go
@@ -16,20 +16,23 @@ import (
"time"
"api/contexts"
- "api/models"
"api/models/constants"
a "api/models/constants/assembly-id"
gq "api/models/constants/genotype-query"
s "api/models/constants/sort"
+ "api/models/dtos"
+ "api/models/indexes"
"api/models/ingest"
esRepo "api/repositories/elasticsearch"
"api/utils"
+ "api/models/constants/zygosity"
+
"github.com/elastic/go-elasticsearch/v7"
"github.com/google/uuid"
+ "github.com/mitchellh/mapstructure"
"github.com/labstack/echo"
- "github.com/mitchellh/mapstructure"
)
func VariantsIngestionStats(c echo.Context) error {
@@ -128,7 +131,6 @@ func VariantsIngest(c echo.Context) error {
}
startTime := time.Now()
-
fmt.Printf("Ingest Start: %s\n", startTime)
// get vcf files
@@ -240,7 +242,14 @@ func VariantsIngest(c echo.Context) error {
// --- decompress vcf.gz
fmt.Printf("Decompressing %s !\n", gzippedFileName)
- gzippedFilePath := fmt.Sprintf("%s%s", vcfPath, gzippedFileName)
+ var separator string
+ if strings.HasPrefix(gzippedFileName, "/") {
+ separator = ""
+ } else {
+ separator = "/"
+ }
+
+ gzippedFilePath := fmt.Sprintf("%s%s%s", vcfPath, separator, gzippedFileName)
r, err := os.Open(gzippedFilePath)
if err != nil {
msg := fmt.Sprintf("error opening %s: %s\n", gzippedFileName, err)
@@ -257,7 +266,7 @@ func VariantsIngest(c echo.Context) error {
// such that DRS can load the file into memory to process rather than receiving
// the file from an upload, thus utilizing it's already-exisiting /private/ingest endpoind
// -----
- tmpDestinationFileName := fmt.Sprintf("%s%s", cfg.Api.BridgeDirectory, gzippedFileName)
+ tmpDestinationFileName := fmt.Sprintf("%s%s%s", cfg.Api.BridgeDirectory, separator, gzippedFileName)
// prepare directory inside bridge directory
partialTmpDir, _ := path.Split(gzippedFileName)
@@ -403,19 +412,20 @@ func VariantsIngest(c echo.Context) error {
defer r.Close()
// --- load vcf into memory and ingest the vcf file into elasticsearch
- fmt.Printf("Begin processing %s !\n", vcfFilePath)
+ beginProcessingTime := time.Now()
+ fmt.Printf("Begin processing %s at [%s]\n", vcfFilePath, beginProcessingTime)
ingestionService.ProcessVcf(vcfFilePath, drsFileId, assemblyId, filterOutHomozygousReferences, cfg.Api.LineProcessingConcurrencyLevel)
+ fmt.Printf("Ingest duration for file at %s : %s\n", vcfFilePath, time.Since(beginProcessingTime))
// --- delete the temporary vcf file
fmt.Printf("Removing temporary file %s !\n", vcfFilePath)
os.Remove(vcfFilePath)
+ fmt.Printf("Removal done!")
// --- delete full tmp path and all contents
// (WARNING : Only do this when running over a single file)
//os.RemoveAll(vcfTmpPath)
- fmt.Printf("Ingest duration for file at %s : %s\n", vcfFilePath, time.Since(startTime))
-
reqStat.State = ingest.Done
ingestionService.IngestRequestChan <- reqStat
}(_fileName, _newRequestState)
@@ -558,15 +568,11 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
// ---
// prepare response
- var respDTO = make(map[string]interface{})
- respDTO["DataType"] = "variant"
-
+ respDTO := dtos.VariantGetReponse{
+ Results: make([]dtos.VariantGetResult, 0),
+ }
respDTOMux := sync.RWMutex{}
- // initialize length 0 to avoid nil response
- tmpResults := make([]interface{}, 0)
- tmpCalls := []models.BentoV2CompatibleVariantResponseCallsModel{}
-
var errors []error
errorMux := sync.RWMutex{}
@@ -578,17 +584,22 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
go func(_id string) {
defer wg.Done()
- var variantRespDataModel = make(map[string]interface{})
- // variantRespDataModel := models.VariantResponseDataModel{}
+ variantResult := dtos.VariantGetResult{
+ Calls: make([]dtos.VariantCall, 0),
+ }
var (
docs map[string]interface{}
searchErr error
)
if isVariantIdQuery {
+ fmt.Printf("Executing Get-Variants for VariantId %s\n", _id)
+
+ // only set query string if
+ // 'getSampleIdsOnly' is false
+ // (current support for bentoV2 + bento_federation_service integration)
if !getSampleIdsOnly {
- variantRespDataModel["VariantId"] = _id
- fmt.Printf("Executing Get-Variants for VariantId %s\n", _id)
+ variantResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor
}
docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es,
@@ -600,10 +611,15 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
getSampleIdsOnly)
} else {
// implied sampleId query
- variantRespDataModel["SampleId"] = _id
-
fmt.Printf("Executing Get-Samples for SampleId %s\n", _id)
+ // only set query string if
+ // 'getSampleIdsOnly' is false
+ // (current support for bentoV2 + bento_federation_service integration)
+ if !getSampleIdsOnly {
+ variantResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor
+ }
+
docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es,
chromosome, lowerBound, upperBound,
"", _id, // note : "" is for variantId
@@ -619,21 +635,46 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
return
}
- // query for each id
+ // -- map variant index models to appropriate variant result + call dto models
+ variantResult.AssemblyId = assemblyId
+ variantResult.Chromosome = chromosome
+ variantResult.Start = lowerBound
+ variantResult.End = upperBound
+
+ if getSampleIdsOnly {
+ // gather data from "aggregations"
+ docsBuckets := docs["aggregations"].(map[string]interface{})["sampleIds"].(map[string]interface{})["buckets"]
+ allDocBuckets := []map[string]interface{}{}
+ mapstructure.Decode(docsBuckets, &allDocBuckets)
+
+ for _, r := range allDocBuckets {
+ sampleId := r["key"].(string)
- if !getSampleIdsOnly {
+ // TEMP : re-capitalize sampleIds retrieved from elasticsearch at response time
+ // TODO: touch up elasticsearch ingestion/parsing settings
+ // to not automatically force all sampleIds to lowercase when indexing
+ sampleId = strings.ToUpper(sampleId)
+
+ // accumulate sample Id's
+ variantResult.Calls = append(variantResult.Calls, dtos.VariantCall{
+ SampleId: sampleId,
+ GenotypeType: string(genotype),
+ })
+ }
+ } else {
+ // gather data from "hits"
docsHits := docs["hits"].(map[string]interface{})["hits"]
allDocHits := []map[string]interface{}{}
mapstructure.Decode(docsHits, &allDocHits)
// grab _source for each hit
- var allSources []models.Variant
+ var allSources []indexes.Variant
for _, r := range allDocHits {
source := r["_source"].(map[string]interface{})
// cast map[string]interface{} to struct
- var resultingVariant models.Variant
+ var resultingVariant indexes.Variant
mapstructure.Decode(source, &resultingVariant)
// accumulate structs
@@ -642,37 +683,36 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
fmt.Printf("Found %d docs!\n", len(allSources))
- variantRespDataModel["Results"] = allSources
-
- respDTOMux.Lock()
- tmpResults = append(tmpResults, variantRespDataModel)
- respDTOMux.Unlock()
- } else {
- // TODO: refactor this 'else' statement
- docsBuckets := docs["aggregations"].(map[string]interface{})["sampleIds"].(map[string]interface{})["buckets"]
- allDocBuckets := []map[string]interface{}{}
- mapstructure.Decode(docsBuckets, &allDocBuckets)
-
- for _, r := range allDocBuckets {
- sampleId := r["key"].(string)
-
- // cast map[string]interface{} to struct
- simplifiedResponse := models.BentoV2CompatibleVariantResponseDataModel{
- SampleId: strings.ToUpper(sampleId),
- GenotypeType: string(genotype),
- }
-
- call := models.BentoV2CompatibleVariantResponseCallsModel{}
- call.AssemblyId = assemblyId
- call.Chromosome = chromosome
- call.Start = lowerBound
- call.End = upperBound
- call.Calls = append(call.Calls, simplifiedResponse)
-
- // accumulate sample Id's
- tmpCalls = append(tmpCalls, call)
+ for _, source := range allSources {
+ // TEMP : re-capitalize sampleIds retrieved from elasticsearch at response time
+ // TODO: touch up elasticsearch ingestion/parsing settings
+ // to not automatically force all sampleIds to lowercase when indexing
+ sampleId := strings.ToUpper(source.Sample.Id)
+
+ variantResult.Calls = append(variantResult.Calls, dtos.VariantCall{
+ Chrom: source.Chrom,
+ Pos: source.Pos,
+ Id: source.Id,
+ Ref: source.Ref,
+ Alt: source.Alt,
+ Format: source.Format,
+ Qual: source.Qual,
+ Filter: source.Filter,
+
+ Info: source.Info,
+
+ SampleId: sampleId,
+ GenotypeType: zygosity.ZygosityToString(source.Sample.Variation.Genotype.Zygosity),
+
+ AssemblyId: source.AssemblyId,
+ })
}
}
+ // --
+
+ respDTOMux.Lock()
+ respDTO.Results = append(respDTO.Results, variantResult)
+ respDTOMux.Unlock()
}(id)
}
@@ -680,33 +720,19 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error
wg.Wait()
if len(errors) == 0 {
- respDTO["Status"] = 200
- respDTO["Message"] = "Success"
- } else {
- respDTO["Status"] = 500
- respDTO["Message"] = "Something went wrong.. Please contact the administrator!"
- }
-
- // cast generic map[string]interface{} to type
- // depending on `getSampleIdsOnly`
- if getSampleIdsOnly {
- respDTO["Results"] = tmpCalls
+ // only set status and message if
+ // 'getSampleIdsOnly' is false
+ // (current support for bentoV2 + bento_federation_service integration)
+ if !getSampleIdsOnly {
+ respDTO.Status = 200
+ respDTO.Message = "Success"
+ }
} else {
- respDTO["Data"] = tmpResults
+ respDTO.Status = 500
+ respDTO.Message = "Something went wrong.. Please contact the administrator!"
}
- // TODO: Refactor
- if getSampleIdsOnly {
- var dto models.BentoV2CompatibleVariantsResponseDTO
- mapstructure.Decode(respDTO, &dto)
-
- return c.JSON(http.StatusOK, dto)
- } else {
- var dto models.VariantsResponseDTO
- mapstructure.Decode(respDTO, &dto)
-
- return c.JSON(http.StatusOK, dto)
- }
+ return c.JSON(http.StatusOK, respDTO)
}
func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error {
@@ -714,7 +740,9 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
var es, chromosome, lowerBound, upperBound, reference, alternative, genotype, assemblyId = retrieveCommonElements(c)
- respDTO := models.VariantsResponseDTO{}
+ respDTO := dtos.VariantCountReponse{
+ Results: make([]dtos.VariantCountResult, 0),
+ }
respDTOMux := sync.RWMutex{}
var errors []error
@@ -727,16 +755,15 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
go func(_id string) {
defer wg.Done()
- variantRespDataModel := models.VariantResponseDataModel{}
+ countResult := dtos.VariantCountResult{}
var (
docs map[string]interface{}
countError error
)
if isVariantIdQuery {
- variantRespDataModel.VariantId = _id
-
fmt.Printf("Executing Count-Variants for VariantId %s\n", _id)
+ countResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor
docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es,
chromosome, lowerBound, upperBound,
@@ -744,9 +771,8 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
reference, alternative, genotype, assemblyId)
} else {
// implied sampleId query
- variantRespDataModel.SampleId = _id
-
fmt.Printf("Executing Count-Samples for SampleId %s\n", _id)
+ countResult.Query = fmt.Sprintf("sampleId:%s", _id) // TODO: Refactor
docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es,
chromosome, lowerBound, upperBound,
@@ -760,11 +786,15 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro
errorMux.Unlock()
return
}
+ countResult.AssemblyId = assemblyId
+ countResult.Chromosome = chromosome
+ countResult.Start = lowerBound
+ countResult.End = upperBound
- variantRespDataModel.Count = int(docs["count"].(float64))
+ countResult.Count = int(docs["count"].(float64))
respDTOMux.Lock()
- respDTO.Data = append(respDTO.Data, variantRespDataModel)
+ respDTO.Results = append(respDTO.Results, countResult)
respDTOMux.Unlock()
}(id)
diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go
index c1e9baff..b2a6ca10 100644
--- a/src/api/services/ingestion.go
+++ b/src/api/services/ingestion.go
@@ -25,8 +25,11 @@ import (
"strconv"
"strings"
"sync"
+ "sync/atomic"
"time"
+ "api/models/indexes"
+
"github.com/Jeffail/gabs"
"github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esutil"
@@ -75,7 +78,7 @@ func NewIngestionService(es *elasticsearch.Client, cfg *models.Config) *Ingestio
Client: iz.ElasticsearchClient,
NumWorkers: numWorkers,
// FlushBytes: int(flushBytes), // The flush threshold in bytes (default: 5MB ?)
- FlushInterval: time.Second, // The periodic flush interval
+ // FlushInterval: time.Second, // The periodic flush interval
})
iz.IngestionBulkIndexer = bi
@@ -83,8 +86,8 @@ func NewIngestionService(es *elasticsearch.Client, cfg *models.Config) *Ingestio
Index: "genes",
Client: iz.ElasticsearchClient,
NumWorkers: numWorkers,
- //FlushBytes: int(64), // The flush threshold in bytes (default: 5MB ?)
- FlushInterval: 3 * time.Second, // The periodic flush interval
+ // FlushBytes: int(64), // The flush threshold in bytes (default: 5MB ?)
+ // FlushInterval: 3 * time.Second, // The periodic flush interval
})
iz.GeneIngestionBulkIndexer = gbi
@@ -96,7 +99,7 @@ func NewIngestionService(es *elasticsearch.Client, cfg *models.Config) *Ingestio
func (i *IngestionService) Init() {
// safeguard to prevent multiple initilizations
if !i.Initialized {
- // spin up a listener for ingest request updates
+ // spin up a listener for both variant and gene ingest request updates
go func() {
for {
select {
@@ -125,7 +128,7 @@ func (i *IngestionService) Init() {
}
}()
- // spin up a listener for each bulk indexing
+ // spin up a listener for both variant and gene bulk indexing
go func() {
for {
select {
@@ -333,6 +336,9 @@ func (i *IngestionService) ProcessVcf(
scanner := bufio.NewScanner(f)
var discoveredHeaders bool = false
var headers []string
+ headerSampleIds := make(map[int]string)
+
+ skippedHomozygousReferencesCount := int32(0)
var _fileWG sync.WaitGroup
@@ -350,6 +356,16 @@ func (i *IngestionService) ProcessVcf(
if strings.Contains(line, "CHROM") {
// Split the string by tabs
headers = strings.Split(line, "\t")
+
+ for id, header := range headers {
+ // determine if header is a default VCF header.
+ // if it is not, assume it's a sampleId and keep
+ // track of it with an id
+ if !utils.StringInSlice(strings.ToLower(strings.TrimSpace(strings.ReplaceAll(header, "#", ""))), constants.VcfHeaders) {
+ headerSampleIds[len(constants.VcfHeaders)-id] = header
+ }
+ }
+
discoveredHeaders = true
fmt.Println("Found the headers: ", headers)
@@ -392,7 +408,7 @@ func (i *IngestionService) ProcessVcf(
value := strings.TrimSpace(rc)
// if not a vcf header, assume it's a sampleId header
- if utils.StringInSlice(key, models.VcfHeaders) {
+ if utils.StringInSlice(key, constants.VcfHeaders) {
// filter field type by column name
if key == "chrom" {
@@ -405,7 +421,7 @@ func (i *IngestionService) ProcessVcf(
tmpVariant[key] = value
tmpVariantMapMutex.Unlock()
} else {
- // TODO: skip this call
+ // skip this call
skipThisCall = true
// redundant?
@@ -447,7 +463,7 @@ func (i *IngestionService) ProcessVcf(
tmpVariant[key] = value
tmpVariantMapMutex.Unlock()
} else if key == "info" {
- var allInfos []*models.Info
+ var allInfos []*indexes.Info
// Split all alleles by semi-colon
semiColonSeparations := strings.Split(value, ";")
@@ -457,12 +473,12 @@ func (i *IngestionService) ProcessVcf(
equalitySeparations := strings.Split(scSep, "=")
if len(equalitySeparations) == 2 {
- allInfos = append(allInfos, &models.Info{
+ allInfos = append(allInfos, &indexes.Info{
Id: equalitySeparations[0],
Value: equalitySeparations[1],
})
} else { // len(equalitySeparations) == 1
- allInfos = append(allInfos, &models.Info{
+ allInfos = append(allInfos, &indexes.Info{
Id: "",
Value: equalitySeparations[0],
})
@@ -479,13 +495,27 @@ func (i *IngestionService) ProcessVcf(
tmpVariantMapMutex.Unlock()
}
} else { // assume its a sampleId header
- tmpSamplesMutex.Lock()
+ allValues := strings.Split(value, ":")
+
+ // ---- filter out homozygous reference calls
+ // support for multi-sampled calls
+ // assume first component of allValues is the genotype
+ genoTypeValue := allValues[0]
+ if filterOutHomozygousReferences && (genoTypeValue == "0|0" || genoTypeValue == "0/0") {
+ // skip adding this sample to the 'tmpSamples' list which
+ // then goes to be further processed into a variant document
+ // increase count of skipped calls
+ atomic.AddInt32(&skippedHomozygousReferencesCount, 1)
+
+ return
+ }
+
+ tmpSamplesMutex.Lock()
tmpSamples = append(tmpSamples, map[string]interface{}{
"key": key,
- "values": strings.Split(value, ":"),
+ "values": allValues,
})
-
tmpSamplesMutex.Unlock()
}
}(rowIndex, rowComponent, &rowWg)
@@ -500,7 +530,7 @@ func (i *IngestionService) ProcessVcf(
}
// --- TODO: prep formats + samples
- var samples []*models.Sample
+ var samples []*indexes.Sample
// ---- get genotype stuff
var (
@@ -530,8 +560,8 @@ func (i *IngestionService) ProcessVcf(
}
for _, ts := range tmpSamples {
- sample := &models.Sample{}
- variation := &models.Variation{}
+ sample := &indexes.Sample{}
+ variation := &indexes.Variation{}
tmpKeyString := ts["key"].(string)
tmpValueStrings := ts["values"].([]string)
@@ -582,7 +612,7 @@ func (i *IngestionService) ProcessVcf(
}
}
- variation.Genotype = models.Genotype{
+ variation.Genotype = indexes.Genotype{
Phased: phased,
Zygosity: zyg,
}
@@ -613,14 +643,6 @@ func (i *IngestionService) ProcessVcf(
sample.Id = tmpKeyString
sample.Variation = *variation
- // ---- filter out homozygous reference calls
- // TODO: determine if this is the most optimal place
- // to perform this verification
- if filterOutHomozygousReferences &&
- sample.Variation.Genotype.Zygosity == z.HomozygousReference {
- continue
- }
-
samples = append(samples, sample)
}
@@ -629,12 +651,17 @@ func (i *IngestionService) ProcessVcf(
// references, and thus maybe all samples from the call
// [i.e. if this is a single-sample VCF])
if len(samples) > 0 {
+
+ // for multi-sample vcfs, add 1 to the waitgroup for
+ // each sample (minus 1 given the initial addition)
+ fileWg.Add(len(samples) - 1)
+
// Create a whole variant document for each sample found on this VCF line
// TODO: revisit this model as it is surely not storage efficient
for _, sample := range samples {
tmpVariant["sample"] = sample
// --- push to a bulk "queue"
- var resultingVariant models.Variant
+ var resultingVariant indexes.Variant
mapstructure.Decode(tmpVariant, &resultingVariant)
// pass variant (along with a waitgroup) to the channel
@@ -658,7 +685,8 @@ func (i *IngestionService) ProcessVcf(
// let all lines be queued up and processed
_fileWG.Wait()
- fmt.Printf("File %s waited for and complete!\n", vcfFilePath)
+
+ fmt.Printf("File %s waited for and complete!\n\t- Number of skipped Homozygous Reference calls: %d\n", vcfFilePath, skippedHomozygousReferencesCount)
}
func (i *IngestionService) FilenameAlreadyRunning(filename string) bool {
diff --git a/src/tests/integration/api/api_capacity_test.go b/src/tests/integration/api/api_capacity_test.go
index affc129c..87680dc0 100644
--- a/src/tests/integration/api/api_capacity_test.go
+++ b/src/tests/integration/api/api_capacity_test.go
@@ -1,84 +1,84 @@
package api
-import (
- "api/models"
- "encoding/json"
- "fmt"
- "io/ioutil"
- "net/http"
- "regexp"
- "sync"
- "testing"
- common "tests/common"
-
- "github.com/stretchr/testify/assert"
-)
-
-func TestCanHandleIngestAllLocalVcfs(_t *testing.T) {
- cfg := common.InitConfig()
-
- var vcfGzfiles []string
-
- fileInfo, err := ioutil.ReadDir(cfg.Api.LocalVcfPath)
- if err != nil {
- fmt.Printf("Failed: %s\n", err)
- }
-
- // Filter only .vcf.gz files
- for _, file := range fileInfo {
- if matched, _ := regexp.MatchString(".vcf.gz", file.Name()); matched {
- vcfGzfiles = append(vcfGzfiles, file.Name())
- } else {
- fmt.Printf("Skipping %s\n", file.Name())
- }
- }
-
- var combWg sync.WaitGroup
- simultaneousRequestsCapacity := 1 // TODO: tweak
- simultaneousRequestsQueue := make(chan bool, simultaneousRequestsCapacity)
-
- // fire and forget
- for _, vcfgz := range vcfGzfiles {
- simultaneousRequestsQueue <- true
- combWg.Add(1)
- go func(_vcfgz string, _combWg *sync.WaitGroup) {
- defer _combWg.Done()
- defer func() { <-simultaneousRequestsQueue }()
-
- url := cfg.Api.Url + fmt.Sprintf("/variants/ingestion/run?fileNames=%s&assemblyId=GRCH37&filterOutHomozygousReferences=true", _vcfgz)
-
- fmt.Printf("Calling %s\n", url)
- request, _ := http.NewRequest("GET", url, nil)
-
- client := &http.Client{}
- response, responseErr := client.Do(request)
- assert.Nil(_t, responseErr)
-
- defer response.Body.Close()
-
- // this test (at the time of writing) will only work if authorization is disabled
- shouldBe := 200
- assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe))
-
- // -- interpret array of ingestion requests from response
- respBody, respBodyErr := ioutil.ReadAll(response.Body)
- assert.Nil(_t, respBodyErr)
-
- // --- transform body bytes to string
- respBodyString := string(respBody)
-
- // -- convert to json and check for error
- var respDto models.VariantsResponseDTO
- jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto)
- assert.Nil(_t, jsonUnmarshallingError)
-
- }(vcfgz, &combWg)
- }
-
- // allowing all lines to be queued up and waited for
- for i := 0; i < simultaneousRequestsCapacity; i++ {
- simultaneousRequestsQueue <- true
- }
-
- combWg.Wait()
-}
+// import (
+// "api/models/dtos"
+// "encoding/json"
+// "fmt"
+// "io/ioutil"
+// "net/http"
+// "regexp"
+// "sync"
+// "testing"
+// common "tests/common"
+
+// "github.com/stretchr/testify/assert"
+// )
+
+// func TestCanHandleIngestAllLocalVcfs(_t *testing.T) {
+// cfg := common.InitConfig()
+
+// var vcfGzfiles []string
+
+// fileInfo, err := ioutil.ReadDir(cfg.Api.LocalVcfPath)
+// if err != nil {
+// fmt.Printf("Failed: %s\n", err)
+// }
+
+// // Filter only .vcf.gz files
+// for _, file := range fileInfo {
+// if matched, _ := regexp.MatchString(".vcf.gz", file.Name()); matched {
+// vcfGzfiles = append(vcfGzfiles, file.Name())
+// } else {
+// fmt.Printf("Skipping %s\n", file.Name())
+// }
+// }
+
+// var combWg sync.WaitGroup
+// simultaneousRequestsCapacity := 1 // TODO: tweak
+// simultaneousRequestsQueue := make(chan bool, simultaneousRequestsCapacity)
+
+// // fire and forget
+// for _, vcfgz := range vcfGzfiles {
+// simultaneousRequestsQueue <- true
+// combWg.Add(1)
+// go func(_vcfgz string, _combWg *sync.WaitGroup) {
+// defer _combWg.Done()
+// defer func() { <-simultaneousRequestsQueue }()
+
+// url := cfg.Api.Url + fmt.Sprintf("/variants/ingestion/run?fileNames=%s&assemblyId=GRCH37&filterOutHomozygousReferences=true", _vcfgz)
+
+// fmt.Printf("Calling %s\n", url)
+// request, _ := http.NewRequest("GET", url, nil)
+
+// client := &http.Client{}
+// response, responseErr := client.Do(request)
+// assert.Nil(_t, responseErr)
+
+// defer response.Body.Close()
+
+// // this test (at the time of writing) will only work if authorization is disabled
+// shouldBe := 200
+// assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe))
+
+// // -- interpret array of ingestion requests from response
+// respBody, respBodyErr := ioutil.ReadAll(response.Body)
+// assert.Nil(_t, respBodyErr)
+
+// // --- transform body bytes to string
+// respBodyString := string(respBody)
+
+// // -- convert to json and check for error
+// var respDto dtos.VariantReponse
+// jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto)
+// assert.Nil(_t, jsonUnmarshallingError)
+
+// }(vcfgz, &combWg)
+// }
+
+// // allowing all lines to be queued up and waited for
+// for i := 0; i < simultaneousRequestsCapacity; i++ {
+// simultaneousRequestsQueue <- true
+// }
+
+// combWg.Wait()
+// }
diff --git a/src/tests/integration/api/api_gene_test.go b/src/tests/integration/api/api_gene_test.go
index 103f3bc2..84707c84 100644
--- a/src/tests/integration/api/api_gene_test.go
+++ b/src/tests/integration/api/api_gene_test.go
@@ -5,6 +5,8 @@ import (
c "api/models/constants"
a "api/models/constants/assembly-id"
"api/models/constants/chromosome"
+ "api/models/dtos"
+ "api/models/indexes"
"encoding/json"
"fmt"
"io/ioutil"
@@ -37,12 +39,12 @@ func TestCanGetGenesByAssemblyIdAndChromosome(t *testing.T) {
// assert the dto response slice is plentiful
assert.NotNil(t, allDtoResponses)
- From(allDtoResponses).ForEachT(func(dto models.GenesResponseDTO) {
+ From(allDtoResponses).ForEachT(func(dto dtos.GenesResponseDTO) {
// ensure there are results in the response
assert.NotNil(t, dto.Results)
// check the resulting data
- From(dto.Results).ForEachT(func(gene models.Gene) {
+ From(dto.Results).ForEachT(func(gene indexes.Gene) {
// ensure the gene is legit
assert.NotNil(t, gene.Name)
assert.NotNil(t, gene.AssemblyId)
@@ -52,7 +54,7 @@ func TestCanGetGenesByAssemblyIdAndChromosome(t *testing.T) {
})
}
-func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []models.GenesResponseDTO {
+func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []dtos.GenesResponseDTO {
cfg := common.InitConfig()
// retrieve the overview
@@ -64,7 +66,7 @@ func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []model
// initialize a common slice in which to
// accumulate al responses asynchronously
- allDtoResponses := []models.GenesResponseDTO{}
+ allDtoResponses := []dtos.GenesResponseDTO{}
allDtoResponsesMux := sync.RWMutex{}
var combWg sync.WaitGroup
@@ -143,7 +145,7 @@ func getGenesOverview(_t *testing.T, _cfg *models.Config) map[string]interface{}
return overviewRespJson
}
-func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) models.GenesResponseDTO {
+func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) dtos.GenesResponseDTO {
queryString := fmt.Sprintf("?chromosome=%s&assemblyId=%s", chromosome, assemblyId)
@@ -152,7 +154,7 @@ func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.
return getGetGenesCall(url, _t)
}
-func getGetGenesCall(url string, _t *testing.T) models.GenesResponseDTO {
+func getGetGenesCall(url string, _t *testing.T) dtos.GenesResponseDTO {
fmt.Printf("Calling %s\n", url)
request, _ := http.NewRequest("GET", url, nil)
@@ -174,7 +176,7 @@ func getGetGenesCall(url string, _t *testing.T) models.GenesResponseDTO {
respBodyString := string(respBody)
// -- convert to json and check for error
- var respDto models.GenesResponseDTO
+ var respDto dtos.GenesResponseDTO
jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto)
assert.Nil(_t, jsonUnmarshallingError)
diff --git a/src/tests/integration/api/api_variant_test.go b/src/tests/integration/api/api_variant_test.go
index 935b98d0..e4044a79 100644
--- a/src/tests/integration/api/api_variant_test.go
+++ b/src/tests/integration/api/api_variant_test.go
@@ -7,6 +7,8 @@ import (
gq "api/models/constants/genotype-query"
s "api/models/constants/sort"
z "api/models/constants/zygosity"
+ "api/models/dtos"
+ "api/models/indexes"
"encoding/json"
"fmt"
"io/ioutil"
@@ -96,8 +98,8 @@ func TestCanGetVariantsWithoutInfoInResultset(t *testing.T) {
// assert that all responses from all combinations have no results
for _, dtoResponse := range allDtoResponses {
- firstDataPointResults := dtoResponse.Data[0].Results
- assert.Nil(t, firstDataPointResults[0].Info)
+ firstDataPointCalls := dtoResponse.Results[0].Calls
+ assert.Nil(t, firstDataPointCalls[0].Info)
}
}
@@ -109,17 +111,17 @@ func TestCanGetVariantsWithInfoInResultset(t *testing.T) {
// - * accumulate all infos into a single list using the set of
// SelectManyT's and the SelectT
// - ** iterate over each info in the ForEachT
- var accumulatedInfos []*models.Info
+ var accumulatedInfos []*indexes.Info
- From(allDtoResponses).SelectManyT(func(resp models.VariantsResponseDTO) Query { // *
- return From(resp.Data)
- }).SelectManyT(func(data models.VariantResponseDataModel) Query {
- return From(data.Results)
- }).SelectManyT(func(variant models.Variant) Query {
+ From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // *
+ return From(resp.Results)
+ }).SelectManyT(func(data dtos.VariantGetResult) Query {
+ return From(data.Calls)
+ }).SelectManyT(func(variant dtos.VariantCall) Query {
return From(variant.Info)
- }).SelectT(func(info models.Info) models.Info {
+ }).SelectT(func(info indexes.Info) indexes.Info {
return info
- }).ForEachT(func(info models.Info) { // **
+ }).ForEachT(func(info indexes.Info) { // **
accumulatedInfos = append(accumulatedInfos, &info)
})
@@ -140,17 +142,17 @@ func TestCanGetVariantsInAscendingPositionOrder(t *testing.T) {
// assert the dto response slice is plentiful
assert.NotNil(t, allDtoResponses)
- From(allDtoResponses).ForEachT(func(dto models.VariantsResponseDTO) {
+ From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) {
// ensure there is data
- assert.NotNil(t, dto.Data)
+ assert.NotNil(t, dto.Results)
// check the data
- From(dto.Data).ForEachT(func(d models.VariantResponseDataModel) {
+ From(dto.Results).ForEachT(func(d dtos.VariantGetResult) {
// ensure the variants slice is plentiful
- assert.NotNil(t, d.Results)
+ assert.NotNil(t, d.Calls)
latestSmallest := 0
- From(d.Results).ForEachT(func(dd models.Variant) {
+ From(d.Calls).ForEachT(func(dd dtos.VariantCall) {
// verify order
if latestSmallest != 0 {
assert.True(t, latestSmallest <= dd.Pos)
@@ -169,17 +171,17 @@ func TestCanGetVariantsInDescendingPositionOrder(t *testing.T) {
// assert the dto response slice is plentiful
assert.NotNil(t, allDtoResponses)
- From(allDtoResponses).ForEachT(func(dto models.VariantsResponseDTO) {
+ From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) {
// ensure there is data
- assert.NotNil(t, dto.Data)
+ assert.NotNil(t, dto.Results)
// check the data
- From(dto.Data).ForEachT(func(d models.VariantResponseDataModel) {
+ From(dto.Results).ForEachT(func(d dtos.VariantGetResult) {
// ensure the variants slice is plentiful
- assert.NotNil(t, d.Results)
+ assert.NotNil(t, d.Calls)
latestGreatest := 0
- From(d.Results).ForEachT(func(dd models.Variant) {
+ From(d.Calls).ForEachT(func(dd dtos.VariantCall) {
if latestGreatest != 0 {
assert.True(t, latestGreatest >= dd.Pos)
}
@@ -207,14 +209,14 @@ func TestCanGetHomozygousAlternateSamples(t *testing.T) {
func TestCanGetHomozygousAlternateVariantsWithVariousReferences(t *testing.T) {
// setup
- specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) {
+ specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) {
// ensure test is formatted correctly
assert.True(__t, alternativeAllelePattern == "")
// validate variant
- assert.Contains(__t, variant.Ref, referenceAllelePattern)
+ assert.Contains(__t, call.Ref, referenceAllelePattern)
- validateHomozygousAlternateSample(__t, &variant.Sample)
+ validateHomozygousAlternateSample(__t, call)
}
executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation)
@@ -222,14 +224,14 @@ func TestCanGetHomozygousAlternateVariantsWithVariousReferences(t *testing.T) {
func TestCanGetHomozygousReferenceVariantsWithVariousReferences(t *testing.T) {
// setup
- specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) {
+ specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) {
// ensure test is formatted correctly
assert.True(__t, alternativeAllelePattern == "")
// validate variant
- assert.Contains(__t, variant.Ref, referenceAllelePattern)
+ assert.Contains(__t, call.Ref, referenceAllelePattern)
- validateHomozygousReferenceSample(__t, &variant.Sample)
+ validateHomozygousReferenceSample(__t, call)
}
executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation)
@@ -237,14 +239,14 @@ func TestCanGetHomozygousReferenceVariantsWithVariousReferences(t *testing.T) {
func TestCanGetHeterozygousVariantsWithVariousReferences(t *testing.T) {
// setup
- specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) {
+ specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) {
// ensure test is formatted correctly
assert.True(__t, alternativeAllelePattern == "")
// validate variant
- assert.Contains(__t, variant.Ref, referenceAllelePattern)
+ assert.Contains(__t, call.Ref, referenceAllelePattern)
- validateHeterozygousSample(__t, &variant.Sample)
+ validateHeterozygousSample(__t, call)
}
// trigger
@@ -253,14 +255,14 @@ func TestCanGetHeterozygousVariantsWithVariousReferences(t *testing.T) {
func TestCanGetHomozygousAlternateVariantsWithVariousAlternatives(t *testing.T) {
// setup
- specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) {
+ specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) {
// ensure test is formatted correctly
assert.True(__t, referenceAllelePattern == "")
// validate variant
- assert.Contains(__t, variant.Alt, alternativeAllelePattern)
+ assert.Contains(__t, call.Alt, alternativeAllelePattern)
- validateHomozygousAlternateSample(__t, &variant.Sample)
+ validateHomozygousAlternateSample(__t, call)
}
// trigger
@@ -269,14 +271,14 @@ func TestCanGetHomozygousAlternateVariantsWithVariousAlternatives(t *testing.T)
func TestCanGetHomozygousReferenceVariantsWithVariousAlternatives(t *testing.T) {
// setup
- specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) {
+ specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) {
// ensure test is formatted correctly
assert.True(__t, referenceAllelePattern == "")
// validate variant
- assert.Contains(__t, variant.Alt, alternativeAllelePattern)
+ assert.Contains(__t, call.Alt, alternativeAllelePattern)
- validateHomozygousReferenceSample(__t, &variant.Sample)
+ validateHomozygousReferenceSample(__t, call)
}
// trigger
@@ -285,14 +287,14 @@ func TestCanGetHomozygousReferenceVariantsWithVariousAlternatives(t *testing.T)
func TestCanGetHeterozygousVariantsWithVariousAlternatives(t *testing.T) {
// setup
- specificValidation := func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string) {
+ specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) {
// ensure test is formatted correctly
assert.True(__t, referenceAllelePattern == "")
// validate variant
- assert.Contains(__t, variant.Alt, alternativeAllelePattern)
+ assert.Contains(__t, call.Alt, alternativeAllelePattern)
- validateHeterozygousSample(__t, &variant.Sample)
+ validateHeterozygousSample(__t, call)
}
// trigger
@@ -302,7 +304,7 @@ func TestCanGetHeterozygousVariantsWithVariousAlternatives(t *testing.T) {
// -- Common utility functions for api tests
func executeReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T,
genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType,
- specificValidation func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string)) {
+ specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) {
// TODO: use some kind of Allele Enum
patterns := []string{"A", "C", "T", "G"}
@@ -329,70 +331,66 @@ func executeReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T,
func runAndValidateReferenceOrAlternativeQueryResults(_t *testing.T,
genotypeQuery c.GenotypeQuery,
referenceAllelePattern string, alternativeAllelePattern string,
- specificValidation func(__t *testing.T, variant *models.Variant, referenceAllelePattern string, alternativeAllelePattern string)) {
+ specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) {
allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern)
// assert that all of the responses include sample sets with the appropriate zygosity
// - * accumulate all variants into a single list using the set of SelectManyT's and the SelectT
// - ** iterate over each variant in the ForEachT
- var accumulatedVariants []*models.Variant
-
- From(allDtoResponses).SelectManyT(func(resp models.VariantsResponseDTO) Query { // *
- return From(resp.Data)
- }).SelectManyT(func(data models.VariantResponseDataModel) Query {
- return From(data.Results)
- }).SelectT(func(variant models.Variant) models.Variant {
- return variant
- }).ForEachT(func(variant models.Variant) { // **
- accumulatedVariants = append(accumulatedVariants, &variant)
+ // var accumulatedVariants []*indexes.Variant
+ var accumulatedCalls []*dtos.VariantCall
+
+ From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // *
+ return From(resp.Results)
+ }).SelectManyT(func(data dtos.VariantGetResult) Query {
+ return From(data.Calls)
+ }).ForEachT(func(call dtos.VariantCall) { // **
+ accumulatedCalls = append(accumulatedCalls, &call)
})
- if len(accumulatedVariants) == 0 {
+ if len(accumulatedCalls) == 0 {
_t.Skip(fmt.Sprintf("No variants returned for patterns ref: '%s', alt: '%s'! Skipping --", referenceAllelePattern, alternativeAllelePattern))
}
- for _, v := range accumulatedVariants {
+ for _, v := range accumulatedCalls {
assert.NotNil(_t, v.Id)
specificValidation(_t, v, referenceAllelePattern, alternativeAllelePattern)
}
}
-func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, sample *models.Sample)) {
+func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, call *dtos.VariantCall)) {
allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, "", "")
// assert that all of the responses include heterozygous sample sets
// - * accumulate all samples into a single list using the set of SelectManyT's and the SelectT
// - ** iterate over each sample in the ForEachT
- var accumulatedSamples []*models.Sample
-
- From(allDtoResponses).SelectManyT(func(resp models.VariantsResponseDTO) Query { // *
- return From(resp.Data)
- }).SelectManyT(func(data models.VariantResponseDataModel) Query {
- return From(data.Results)
- }).SelectT(func(variant models.Variant) models.Sample {
- return variant.Sample
- }).ForEachT(func(sample models.Sample) { // **
- accumulatedSamples = append(accumulatedSamples, &sample)
+ // var accumulatedSamples []*indexes.Sample
+ var accumulatedCalls []*dtos.VariantCall
+
+ From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // *
+ return From(resp.Results)
+ }).SelectManyT(func(data dtos.VariantGetResult) Query {
+ return From(data.Calls)
+ }).ForEachT(func(call dtos.VariantCall) { // **
+ accumulatedCalls = append(accumulatedCalls, &call)
})
- if len(accumulatedSamples) == 0 {
+ if len(accumulatedCalls) == 0 {
_t.Skip("No samples returned! Skipping --")
}
- for _, s := range accumulatedSamples {
- assert.NotEmpty(_t, s.Id)
- assert.NotEmpty(_t, s.Variation)
- assert.NotEmpty(_t, s.Variation.Genotype)
- assert.NotEmpty(_t, s.Variation.Genotype.Zygosity)
+ for _, c := range accumulatedCalls {
+ assert.NotEmpty(_t, c.SampleId)
+ assert.NotEmpty(_t, c.GenotypeType)
- specificValidation(_t, s)
+ specificValidation(_t, c)
}
}
-func buildQueryAndMakeGetVariantsCall(chromosome string, sampleId string, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, referenceAllelePattern string, alternativeAllelePattern string, _t *testing.T, _cfg *models.Config) models.VariantsResponseDTO {
+func buildQueryAndMakeGetVariantsCall(chromosome string, sampleId string, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, referenceAllelePattern string, alternativeAllelePattern string, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse {
queryString := fmt.Sprintf("?chromosome=%s&ids=%s&includeInfoInResultSet=%t&sortByPosition=%s&assemblyId=%s", chromosome, sampleId, includeInfo, sortByPosition, assemblyId)
@@ -467,7 +465,7 @@ func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct
return allCombinations
}
-func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []models.VariantsResponseDTO {
+func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse {
cfg := common.InitConfig()
// retrieve the overview
@@ -483,7 +481,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc
// initialize a common slice in which to
// accumulate al responses asynchronously
- allDtoResponses := []models.VariantsResponseDTO{}
+ allDtoResponses := []dtos.VariantGetReponse{}
allDtoResponsesMux := sync.RWMutex{}
var combWg sync.WaitGroup
@@ -499,7 +497,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc
// make the call
dto := buildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, _t, cfg)
- assert.Equal(_t, 1, len(dto.Data))
+ assert.Equal(_t, 1, len(dto.Results))
// accumulate all response objects
// to a common slice in an
@@ -515,7 +513,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc
return allDtoResponses
}
-func makeGetVariantsCall(url string, _t *testing.T) models.VariantsResponseDTO {
+func makeGetVariantsCall(url string, _t *testing.T) dtos.VariantGetReponse {
fmt.Printf("Calling %s\n", url)
request, _ := http.NewRequest("GET", url, nil)
@@ -537,7 +535,7 @@ func makeGetVariantsCall(url string, _t *testing.T) models.VariantsResponseDTO {
respBodyString := string(respBody)
// -- convert to json and check for error
- var respDto models.VariantsResponseDTO
+ var respDto dtos.VariantGetReponse
jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto)
assert.Nil(_t, jsonUnmarshallingError)
@@ -545,16 +543,19 @@ func makeGetVariantsCall(url string, _t *testing.T) models.VariantsResponseDTO {
}
// --- sample validation
-func validateHeterozygousSample(__t *testing.T, sample *models.Sample) {
- assert.True(__t, sample.Variation.Genotype.Zygosity == z.Heterozygous)
+func validateHeterozygousSample(__t *testing.T, call *dtos.VariantCall) {
+ // assert.True(__t, sample.Variation.Genotype.Zygosity == z.Heterozygous)
+ assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Heterozygous))
}
-func validateHomozygousReferenceSample(__t *testing.T, sample *models.Sample) {
- assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousReference)
+func validateHomozygousReferenceSample(__t *testing.T, call *dtos.VariantCall) {
+ // assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousReference)
+ assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousReference))
}
-func validateHomozygousAlternateSample(__t *testing.T, sample *models.Sample) {
- assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousAlternate)
+func validateHomozygousAlternateSample(__t *testing.T, call *dtos.VariantCall) {
+ // assert.True(__t, sample.Variation.Genotype.Zygosity == z.HomozygousAlternate)
+ assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousAlternate))
}
// --