Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added possibility to retrieve the latest available image #13

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions sentinel2download/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CONSTRAINTS = MappingProxyType({'CLOUDY_PIXEL_PERCENTAGE': 100.0, 'NODATA_PIXEL_PERCENTAGE': 100.0, })

FOLDER_SUFFIX = "_$folder$"
DATE_PATTERN = r"_(\d+)T\d+_"


class Sentinel2Downloader:
Expand All @@ -45,14 +46,17 @@ def __init__(self, api_key: str, verbose: bool = False):
self.bucket = self.client.get_bucket('gcp-public-data-sentinel-2')
self.metadata_suffix = 'MTD_TL.xml'

def _filter_by_dates(self, safe_prefixes) -> List[str]:
@staticmethod
def _prefix_to_date(prefix, date_pattern=DATE_PATTERN, date_format='%Y%m%d') -> datetime:
# acquired date: 20200812T113607
date_pattern = r"_(\d+)T\d+_"
search = re.search(date_pattern, prefix)
date = search.group(1)
return datetime.strptime(date, date_format)

def _filter_by_dates(self, safe_prefixes) -> List[str]:
filtered = list()
for safe_prefix in safe_prefixes:
search = re.search(date_pattern, safe_prefix)
date = search.group(1)
date = datetime.strptime(date, '%Y%m%d')
date = self._prefix_to_date(safe_prefix)
if date in self.date_range:
filtered.append(safe_prefix)
return filtered
Expand Down Expand Up @@ -161,9 +165,29 @@ def _get_blobs_to_load(self, prefixes):

return blobs_to_load

@staticmethod
def extract_date(s):
match = re.search(DATE_PATTERN, s)
if match:
return match.group(1)
return ''

def _get_latest_available_date_prefix(self, safe_prefixes) -> List[str]:
prefixes_date_descend = sorted(safe_prefixes, key=lambda s: self.extract_date(s), reverse=True)
if self.full_download:
return prefixes_date_descend[:1]
for prefix in prefixes_date_descend:
blobs_to_load = self._get_blobs_to_load([prefix])
if blobs_to_load:
return [prefix]
return []

def _get_filtered_prefixes(self, tile_prefix) -> List[str]:
# filter store items by base prefix, ex: tiles/36/U/YA/
safe_prefixes = self._get_safe_prefixes(tile_prefix)
if self.latest_date:
# get latest available image .SAFE path
return self._get_latest_available_date_prefix(safe_prefixes)
# filter .SAFE paths by date range
filtered_prefixes = self._filter_by_dates(safe_prefixes)
return filtered_prefixes
Expand Down Expand Up @@ -202,7 +226,7 @@ def _download_blobs_mult(self, blobs) -> List[Tuple[str, str]]:
return results

def _setup(self, product_type, tiles, start_date, end_date, bands,
constraints, output_dir, cores, full_download):
constraints, output_dir, cores, full_download, latest_date):
if product_type not in PRODUCT_TYPE:
raise ValueError(f"Provide proper Sentinel2 type: {PRODUCT_TYPE}")
self.product_type = product_type
Expand Down Expand Up @@ -234,6 +258,7 @@ def _setup(self, product_type, tiles, start_date, end_date, bands,
self.output_dir = output_dir
self.cores = cores
self.full_download = full_download
self.latest_date = latest_date

def download(self,
product_type: str,
Expand All @@ -245,7 +270,8 @@ def download(self,
constraints: dict = CONSTRAINTS,
output_dir: str = './sentinel2imagery',
cores: int = 5,
full_download: bool = False) -> Optional[List]:
full_download: bool = False,
latest_date: bool = False) -> Optional[List]:
"""
:param product_type: str, "L2A" or "L1C" Sentinel2 products
:param tiles: list, tiles to load (ex: {36UYA, 36UYB})
Expand All @@ -258,11 +284,13 @@ def download(self,
:param output_dir: str, path to loading dir, default: './sentinel2imagery'
:param cores: int, number of cores, default: 5
:param full_download: bool, option for full download of Sentinel-2 .SAFE folder, default: False
:param latest_date: bool, option for retrieving last available image, default: False
:return: [tuple, None], tuples (save_path, blob_name), if save_path is None, the blob not loaded
or None if nothing to load
"""

self._setup(product_type, tiles, start_date, end_date, bands, constraints, output_dir, cores, full_download)
self._setup(product_type, tiles, start_date, end_date, bands, constraints, output_dir, cores, full_download,
latest_date)

logger.info("Start downloading...")
start_time = time.time()
Expand Down
30 changes: 30 additions & 0 deletions sentinel2download/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,33 @@ def _zone_number(lat, lon):
return 32600 + zone
else:
return 32700 + zone

def overlap_with_geometry(self, *, limit: float = 0.001) -> Optional[gp.GeoDataFrame]:
"""
Find unique tiles that intersects given aoi, area.
The same as overlap, but with geometry.
:param limit: float, min intersection area in km2
:return: GeoDataFrame: Tile names (Name column) and it's geometry in epsg:4326
"""

logger.info(f"Start finding overlapping tiles")

grid, epsg = self._intersect(limit)

aoi = self.aoi
overlap_tiles = list()
for row in grid.itertuples():
start_area = aoi.geometry[0].area
aoi.geometry[0] = aoi.geometry[0].difference(row.geometry)
if start_area != aoi.geometry[0].area:
overlap_tiles.append(dict(Name=row.Name, geometry=row.geometry))

if not overlap_tiles:
return

tiles = gp.GeoDataFrame(overlap_tiles, crs=epsg)
tiles = tiles.to_crs(self.crs)

logger.info(f"Found {len(tiles)} tiles: {', '.join(sorted(tiles.Name))}")
return tiles