Skip to content

Commit

Permalink
Hubs (#1445)
Browse files Browse the repository at this point in the history
Add support for UCSC track hubs.    Includes readers for twobit, UCSC b+ trees, and trix files.   This also includes a complete rewrite of bigbed/bigwig readers, and a refactoring of "Genome" related classes.
  • Loading branch information
jrobinso authored Nov 27, 2023
1 parent 300ab06 commit bcdd371
Show file tree
Hide file tree
Showing 144 changed files with 18,573 additions and 2,501 deletions.
1 change: 1 addition & 0 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@
requires software.amazon.awssdk.http;
requires software.amazon.awssdk.utils;
requires com.fasterxml.jackson.core;
requires jide.oss;
}
4 changes: 2 additions & 2 deletions src/main/java/org/broad/igv/bbfile/BBDataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,11 @@ protected synchronized DataTile getRawData(String chr, int start, int end) {

private List<LocusScore> getWholeGenomeScores() {

if (genome.getHomeChromosome().equals(Globals.CHR_ALL) && windowFunction != WindowFunction.none) {
if (windowFunction != WindowFunction.none) {

if (wholeGenomeScores.get(windowFunction) == null) {

double scale = genome.getNominalLength() / screenWidth;
double scale = genome.getWGLength() / screenWidth;

int maxChromId = reader.getChromosomeNames().size() - 1;
String firstChr = reader.getChromsomeFromId(0);
Expand Down
42 changes: 21 additions & 21 deletions src/main/java/org/broad/igv/bbfile/BBFileHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
**/
public class BBFileHeader {

private static Logger log = LogManager.getLogger(BBFileHeader.class);
public static Logger log = LogManager.getLogger(BBFileHeader.class);

// defines bigBed/bigwig Header Format types
static public final int BBFILE_HEADER_SIZE = 64;
Expand All @@ -62,31 +62,31 @@ public class BBFileHeader {
static public final int BIGBED_MAGIC_HTL = 0xEBF28987; // BigBed Magic High to Low

// defines the bigBed/bigWig source file access
private String path; // bigBed file/pathname
private SeekableStream fis; // BBFile I/O stream handle
private long fileHeaderOffset; // file offset for file header
public String path; // bigBed file/pathname
public SeekableStream fis; // BBFile I/O stream handle
public long fileHeaderOffset; // file offset for file header

private boolean isHeaderOK; // File header read correctly?
private boolean isLowToHigh; // flag indicates values represented low to high bytes
private boolean isBigBed; // flag indicates file is BigBed format
private boolean isBigWig; // flag indicates file is BigWig format;
public boolean isHeaderOK; // File header read correctly?
public boolean isLowToHigh; // flag indicates values represented low to high bytes
public boolean isBigBed; // flag indicates file is BigBed format
public boolean isBigWig; // flag indicates file is BigWig format;

// BBFile Header items - Table C:
// mMagic number (4 bytes) indicates file type and byte order :
// 0x888FFC26 for bigWig, little endian if swapped
// 0x8789F2EB for bigBed, little endian if swapped
private int magic; // 4 byte mMagic Number
private int version; // 2 byte version ID; currently 3
private int nZoomLevels; // 2 byte count of zoom sumary levels
private long chromTreeOffset; // 8 byte offset to mChromosome B+ Tree index
private long fullDataOffset; // 8 byte offset to unzoomed data dataCount
private long fullIndexOffset; // 8 byte offset to R+ Tree index of items
private int fieldCount; // 2 byte number of fields in bed. (0 for bigWig)
private int definedFieldCount; // 2 byte number of fields that are bed fields
private long autoSqlOffset; // 8 byte offset to 0 terminated string with .as spec
private long totalSummaryOffset; // 8 byte offset to file summary data block
private int uncompressBuffSize; // 4 byte maximum size for decompressed buffer
private long reserved; // 8 bytes reserved for future expansion. Currently 0
public int magic; // 4 byte mMagic Number
public int version; // 2 byte version ID; currently 3
public int nZoomLevels; // 2 byte count of zoom sumary levels
public long chromTreeOffset; // 8 byte offset to mChromosome B+ Tree index
public long fullDataOffset; // 8 byte offset to unzoomed data dataCount
public long fullIndexOffset; // 8 byte offset to R+ Tree index of items
public int fieldCount; // 2 byte number of fields in bed. (0 for bigWig)
public int definedFieldCount; // 2 byte number of fields that are bed fields
public long autoSqlOffset; // 8 byte offset to 0 terminated string with .as spec
public long totalSummaryOffset; // 8 byte offset to file summary data block
public int uncompressBuffSize; // 4 byte maximum size for decompressed buffer
public long reserved; // 8 bytes reserved for future expansion. Currently 0

// constructor reads BBFile header from an input stream
public BBFileHeader(String path, SeekableStream fis, long fileOffset) {
Expand Down Expand Up @@ -208,7 +208,7 @@ else if (isBigBed())
* Success status flag is true for successfully read header,
* or is false for a read error.
**/
private boolean readBBFileHeader(long fileOffset) {
public boolean readBBFileHeader(long fileOffset) {

BBFileHeader bbHeader = null;
LittleEndianInputStream lbdis = null;
Expand Down
1 change: 0 additions & 1 deletion src/main/java/org/broad/igv/bbfile/BBFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ public BBFileReader(String path) throws IOException {
// read in file header
fileOffset = BBFILE_HEADER_OFFSET;
fileHeader = new BBFileHeader(path, fis, fileOffset);
//fileHeader.print();

if (!fileHeader.isHeaderOK()) {
log.error("BBFile header is unrecognized type, header magic = " + fileHeader.getMagic());
Expand Down
13 changes: 6 additions & 7 deletions src/main/java/org/broad/igv/bbfile/codecs/BBBedCodec.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package org.broad.igv.bbfile.codecs;

import org.broad.igv.Globals;
import org.broad.igv.bbfile.BBUtils;
import org.broad.igv.ucsc.bb.BBUtils;
import org.broad.igv.bbfile.BedData;
import org.broad.igv.feature.*;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.tribble.IGVBEDCodec;

import java.util.LinkedHashMap;
Expand All @@ -25,13 +24,13 @@ public BBBedCodec(int standardFieldCount, BBUtils.ASTable autosql) {
this.igvBedCodec = new IGVBEDCodec(); // Backing "tribble" codec
}

public BasicFeature decode(BedData feat) {
public BasicFeature decode(BedData bedData) {

String[] restOfFields = Globals.tabPattern.split(feat.getRestOfFields(), -1);
String[] restOfFields = Globals.tabPattern.split(bedData.getRestOfFields(), -1);
String[] tokens = new String[this.standardFieldCount];
tokens[0] = feat.getChromosome();
tokens[1] = String.valueOf(feat.getStartBase());
tokens[2] = String.valueOf(feat.getEndBase());
tokens[0] = bedData.getChromosome();
tokens[1] = String.valueOf(bedData.getStartBase());
tokens[2] = String.valueOf(bedData.getEndBase());

System.arraycopy(restOfFields, 0, tokens, 3, this.standardFieldCount - 3);
BasicFeature feature = igvBedCodec.decode(tokens);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package org.broad.igv.bbfile.codecs;

import org.broad.igv.bbfile.BBUtils;

import java.util.Collections;
import java.util.List;
import org.broad.igv.ucsc.bb.BBUtils;

public class BBCodecFactory {

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/broad/igv/bbfile/codecs/BBRmskCodec.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.broad.igv.bbfile.codecs;

import org.broad.igv.Globals;
import org.broad.igv.bbfile.BBUtils;
import org.broad.igv.ucsc.bb.BBUtils;
import org.broad.igv.bbfile.BedData;
import org.broad.igv.feature.BasicFeature;
import org.broad.igv.feature.Exon;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/broad/igv/data/AbstractDataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public AbstractDataSource(Genome genome) {

public int getChrLength(String chr) {
if (chr.equals(Globals.CHR_ALL)) {
return (int) (genome.getNominalLength() / 1000);
return (int) (genome.getWGLength() / 1000);
} else {
Chromosome c = genome.getChromosome(chr);
return c == null ? 0 : c.getLength();
Expand Down
1 change: 0 additions & 1 deletion src/main/java/org/broad/igv/data/DatasetDataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ public DatasetDataSource(String trackId, Dataset dataset, Genome genome) {
this.trackId = trackId;
this.dataset = dataset;

// TODO -- remove this "instanceof" hack
if (genome != null && genome.getHomeChromosome() != null) {
if (genome.getHomeChromosome().equals(Globals.CHR_ALL)) {
if (dataset instanceof IGVDataset) {
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/broad/igv/data/GenomeSummaryData.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public class GenomeSummaryData {
public GenomeSummaryData(Genome genome, String[] samples) {
this.genome = genome;
this.samples = samples;
scale = (genome.getNominalLength() / locationUnit) / nPixels;
scale = (genome.getWGLength() / locationUnit) / nPixels;

List<String> chrNames = genome.getLongChromosomeNames();
locationMap = new HashMap<String, IntArrayList>();
Expand All @@ -115,7 +115,7 @@ public GenomeSummaryData(Genome genome, String[] samples) {
void setScale(double scale){
if(nDataPts > 0) throw new IllegalStateException("Can't alter scale after adding data");
this.scale = scale;
nPixels = (int) (((double) this.genome.getNominalLength() / locationUnit) / scale);
nPixels = (int) (((double) this.genome.getWGLength() / locationUnit) / scale);
}


Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/broad/igv/data/seg/FreqData.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void compute(float ampThreshold, float delThreshold) {
amp.clear();
del.clear();

int sizeInKB = (int) (genome.getNominalLength() / 1000);
int sizeInKB = (int) (genome.getWGLength() / 1000);
int wgBinSize = sizeInKB / 700;
int wgBinCount = sizeInKB / wgBinSize + 1;

Expand Down
7 changes: 0 additions & 7 deletions src/main/java/org/broad/igv/feature/Chromosome.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,6 @@ public void setIndex(int ii) {
this.index = ii;
}

/**
* @return List of cytobands for this chromosome, if any. Can be null.
*/
public List<Cytoband> getCytobands() {
return cytobands;
}


public void setCytobands(List<Cytoband> cytobands) {
this.cytobands = cytobands;
Expand Down
24 changes: 16 additions & 8 deletions src/main/java/org/broad/igv/feature/Cytoband.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
public class Cytoband implements IGVNamedFeature {
String chromosome;
String name;
String longName;
int end;
int start;
char type; // p, n, or c
Expand All @@ -41,6 +40,22 @@ public Cytoband(String chromosome) {
this.name = "";
}

public Cytoband(String chromosome, int start,int end, String name, String gieStain) {
this.chromosome = chromosome;
this.end = end;
this.start = start;
this.name = name;
if (gieStain.equals("acen")) {
setType('c');
} else {
setType(gieStain.charAt(1));
if (type == 'p') {
String stainString = gieStain.substring(4).trim();
short stain = stainString.length() == 0 ? 100 : Short.parseShort(stainString);
setStain(stain);
}
}
}

public void trim() {

Expand All @@ -64,13 +79,6 @@ public String getName() {
return name;
}

public String getLongName() {
if(longName == null) {
longName = chromosome.replace("chr", "") + name;
}
return longName;
}

public void setEnd(int end) {
this.end = end;
}
Expand Down
23 changes: 12 additions & 11 deletions src/main/java/org/broad/igv/feature/FeatureDB.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
//~--- non-JDK imports --------------------------------------------------------

import com.jidesoft.utils.SortedList;
import htsjdk.tribble.NamedFeature;
import org.broad.igv.logging.*;
import org.broad.igv.Globals;
import org.broad.igv.feature.aa.AminoAcidManager;
Expand All @@ -54,10 +55,10 @@ public class FeatureDB {
* Map for all features other than genes.
*/
//private static Map<String, IGVNamedFeature> featureMap = new HashMap(10000);
private static Map<String, List<IGVNamedFeature>> featureMap = Collections.synchronizedSortedMap(new TreeMap<String, List<IGVNamedFeature>>());
private static Map<String, List<NamedFeature>> featureMap = Collections.synchronizedSortedMap(new TreeMap<String, List<NamedFeature>>());
private static final int MAX_DUPLICATE_COUNT = 20;

public static void addFeature(IGVNamedFeature feature, Genome genome) {
public static void addFeature(NamedFeature feature, Genome genome) {

final String name = feature.getName();
if (name != null && name.length() > 0 && !name.equals(".")) {
Expand Down Expand Up @@ -134,7 +135,7 @@ private static void removeByAttributes(IGVFeature igvFeature, Genome genome) {
* @param genome The genome which these features belong to. Used for checking chromosomes
* @return true if successfully added, false if not
*/
static boolean put(String name, IGVNamedFeature feature, Genome genome) {
static boolean put(String name, NamedFeature feature, Genome genome) {
String key = name.toUpperCase();
if (!Globals.isHeadless()) {
Genome currentGenome = genome != null ? genome : GenomeManager.getInstance().getCurrentGenome();
Expand All @@ -144,9 +145,9 @@ static boolean put(String name, IGVNamedFeature feature, Genome genome) {
}

synchronized (featureMap) {
List<IGVNamedFeature> currentList = featureMap.get(key);
List<NamedFeature> currentList = featureMap.get(key);
if (currentList == null) {
List<IGVNamedFeature> newList = new SortedList<IGVNamedFeature>(
List<NamedFeature> newList = new SortedList<NamedFeature>(
new ArrayList<>(), FeatureComparator.get(true));
boolean added = newList.add(feature);
if (added) {
Expand Down Expand Up @@ -228,9 +229,9 @@ static int size() {
/**
* Return a feature with the given name.
*/
public static IGVNamedFeature getFeature(String name) {
public static NamedFeature getFeature(String name) {
String nm = name.trim().toUpperCase();
List<IGVNamedFeature> features = featureMap.get(nm);
List<NamedFeature> features = featureMap.get(nm);

if (features != null) {
return features.get(0);
Expand Down Expand Up @@ -328,11 +329,11 @@ public static Map<Integer, BasicFeature> getMutationAA(String name, int proteinP
}

Map<Integer, BasicFeature> results = new HashMap<Integer, BasicFeature>();
List<IGVNamedFeature> possibles = featureMap.get(nm);
List<NamedFeature> possibles = featureMap.get(nm);

if (possibles != null) {
synchronized (featureMap) {
for (IGVNamedFeature f : possibles) {
for (NamedFeature f : possibles) {
if (!(f instanceof BasicFeature)) {
continue;
}
Expand Down Expand Up @@ -381,13 +382,13 @@ public static Map<Integer, BasicFeature> getMutationNT(String name, int startPos
}

Map<Integer, BasicFeature> results = new HashMap<Integer, BasicFeature>();
List<IGVNamedFeature> possibles = featureMap.get(nm);
List<NamedFeature> possibles = featureMap.get(nm);
String tempNT;
String brefNT = refNT.toUpperCase();

if (possibles != null) {
synchronized (featureMap) {
for (IGVNamedFeature f : possibles) {
for (NamedFeature f : possibles) {
if (!(f instanceof BasicFeature)) {
continue;
}
Expand Down
39 changes: 39 additions & 0 deletions src/main/java/org/broad/igv/feature/genome/ChromAlias.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.broad.igv.feature.genome;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

public class ChromAlias {

private String chr;
private Map<String, String> aliases;

public ChromAlias(String chr) {
this.chr = chr;
this.aliases = new HashMap<>();
this.aliases.put("chr", chr); // "chr" is the name set
}

public String getChr() {
return chr;
}

public void put(String nameSet, String alias) {
aliases.put(nameSet, alias);
}
public String get(String nameSet) {
return aliases.get(nameSet);
}

public boolean containsKey(String nameSet) {
return aliases.containsKey(nameSet);
}

public Collection<String> values() {
return aliases.values();
}



}
Loading

0 comments on commit bcdd371

Please sign in to comment.