Skip to content

Commit

Permalink
Always use datetime.now(timezone.utc) in paasta status (#3963)
Browse files Browse the repository at this point in the history
This will make it clear that we're always using UTC internally and
avoids the footgun of using utcnow()

I've verified that the output for warming up pods is still the same in
both this version of the code as well as in status quo - as well as the
same for flink and kafka workloads.

There is a small change to the kafka output where now the broker
times include the offset, but this is probably better than the 
previous output where it was unclear what offset the times were in
without looking at the parenthetical humanized time
  • Loading branch information
nemacysts authored Sep 20, 2024
1 parent 6763397 commit 6c78e90
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 23 deletions.
40 changes: 24 additions & 16 deletions paasta_tools/cli/cmds/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,11 @@ def should_job_info_be_shown(cluster_state):


def get_pod_uptime(pod_deployed_timestamp: str):
pod_creation_time = datetime.strptime(pod_deployed_timestamp, "%Y-%m-%dT%H:%M:%SZ")
pod_uptime = datetime.utcnow() - pod_creation_time
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
pod_creation_time = datetime.strptime(
pod_deployed_timestamp, "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=timezone.utc)
pod_uptime = datetime.now(timezone.utc) - pod_creation_time
pod_uptime_total_seconds = pod_uptime.total_seconds()
pod_uptime_days = divmod(pod_uptime_total_seconds, 86400)
pod_uptime_hours = divmod(pod_uptime_days[1], 3600)
Expand All @@ -730,7 +733,7 @@ def get_pod_uptime(pod_deployed_timestamp: str):


def append_pod_status(pod_status, output: List[str]):
output.append(f" Pods:")
output.append(" Pods:")
rows: List[Union[str, Tuple[str, str, str, str]]] = [
("Pod Name", "Host", "Phase", "Uptime")
]
Expand Down Expand Up @@ -844,7 +847,7 @@ def _print_flink_status_from_job_manager(
# So that paasta status -v and kubectl get pods show the same consistent result.
if verbose and len(status["pod_status"]) > 0:
append_pod_status(status["pod_status"], output)
output.append(f" No other information available in non-running state")
output.append(" No other information available in non-running state")
return 0

if status["state"] == "running":
Expand All @@ -854,7 +857,7 @@ def _print_flink_status_from_job_manager(
service=service, instance=instance, client=client
)
except Exception as e:
output.append(PaastaColors.red(f"Exception when talking to the API:"))
output.append(PaastaColors.red("Exception when talking to the API:"))
output.append(str(e))
return 1

Expand All @@ -879,7 +882,7 @@ def _print_flink_status_from_job_manager(
service=service, instance=instance, client=client
)
except Exception as e:
output.append(PaastaColors.red(f"Exception when talking to the API:"))
output.append(PaastaColors.red("Exception when talking to the API:"))
output.append(str(e))
return 1

Expand All @@ -890,7 +893,7 @@ def _print_flink_status_from_job_manager(
try:
jobs = a_sync.block(get_flink_job_details, service, instance, job_ids, client)
except Exception as e:
output.append(PaastaColors.red(f"Exception when talking to the API:"))
output.append(PaastaColors.red("Exception when talking to the API:"))
output.append(str(e))
return 1

Expand All @@ -906,7 +909,7 @@ def _print_flink_status_from_job_manager(
max(10, shutil.get_terminal_size().columns - 52), max_job_name_length
)

output.append(f" Jobs:")
output.append(" Jobs:")
if verbose > 1:
output.append(
f' {"Job Name": <{allowed_max_job_name_length}} State Job ID Started'
Expand Down Expand Up @@ -1313,10 +1316,10 @@ def get_replica_state(pod: KubernetesPodV2) -> ReplicaState:
# This logic likely needs refining
main_container = get_main_container(pod)
if main_container:
# K8s API is returning timestamps in YST, so we use now() instead of utcnow()
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
warming_up = (
pod.create_timestamp + main_container.healthcheck_grace_period
> datetime.now().timestamp()
> datetime.now(timezone.utc).timestamp()
)
if pod.mesh_ready is False:
if main_container.state != "running":
Expand Down Expand Up @@ -1418,14 +1421,17 @@ def create_replica_table(
)
if state == ReplicaState.WARMING_UP:
if verbose > 0:
warmup_duration = datetime.now().timestamp() - pod.create_timestamp
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
warmup_duration = (
datetime.now(timezone.utc).timestamp() - pod.create_timestamp
)
humanized_duration = humanize.naturaldelta(
timedelta(seconds=warmup_duration)
)
grace_period_remaining = (
pod.create_timestamp
+ main_container.healthcheck_grace_period
- datetime.now().timestamp()
- datetime.now(timezone.utc).timestamp()
)
humanized_remaining = humanize.naturaldelta(
timedelta(seconds=grace_period_remaining)
Expand Down Expand Up @@ -1790,6 +1796,7 @@ def node_property_to_str(prop: Dict[str, Any], verbose: int) -> str:
parsed_time = datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ").replace(
tzinfo=timezone.utc
)
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
now = datetime.now(timezone.utc)
return (
humanize.naturaldelta(
Expand Down Expand Up @@ -1825,7 +1832,7 @@ def print_kafka_status(
desired_state = annotations.get(paasta_prefixed("desired_state"))
if desired_state is None:
raise ValueError(
f"expected desired state in kafka annotation, but received none"
"expected desired state in kafka annotation, but received none"
)
output.append(f" State: {desired_state}")

Expand All @@ -1851,7 +1858,7 @@ def print_kafka_status(
)

brokers = status["brokers"]
output.append(f" Brokers:")
output.append(" Brokers:")

if verbose:
headers = ["Id", "Phase", "IP", "Pod Name", "Started"]
Expand All @@ -1864,10 +1871,11 @@ def print_kafka_status(
PaastaColors.green if broker["phase"] == "Running" else PaastaColors.red
)

# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
start_time = datetime.strptime(
broker["deployed_timestamp"], "%Y-%m-%dT%H:%M:%SZ"
)
delta = datetime.utcnow() - start_time
).replace(tzinfo=timezone.utc)
delta = datetime.now(timezone.utc) - start_time
formatted_start_time = f"{str(start_time)} ({humanize.naturaltime(delta)})"

if verbose:
Expand Down
6 changes: 4 additions & 2 deletions paasta_tools/kubernetes_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import os
import re
from datetime import datetime
from datetime import timezone
from enum import Enum
from functools import lru_cache
from inspect import currentframe
Expand Down Expand Up @@ -2939,7 +2940,7 @@ def recent_container_restart(
last_timestamp: Optional[int],
time_window_s: int = 900, # 15 mins
) -> bool:
min_timestamp = datetime.now().timestamp() - time_window_s
min_timestamp = datetime.now(timezone.utc).timestamp() - time_window_s
return (
restart_count > 0
and last_state == "terminated"
Expand Down Expand Up @@ -3661,7 +3662,8 @@ async def get_events_for_object(
)
events = events.items if events else []
if max_age_in_seconds and max_age_in_seconds > 0:
min_timestamp = datetime.now().timestamp() - max_age_in_seconds
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
min_timestamp = datetime.now(timezone.utc).timestamp() - max_age_in_seconds
events = [
evt
for evt in events
Expand Down
10 changes: 5 additions & 5 deletions tests/cli/test_cmds_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -2411,16 +2411,16 @@ def test_output(
expected_output = [
f" Kafka View Url: {status['kafka_view_url']}",
f" Zookeeper: {status['zookeeper']}",
f" State: testing",
" State: testing",
f" Ready: {str(status['cluster_ready']).lower()}",
f" Health: {PaastaColors.red('unhealthy')}",
f" Reason: {status['health']['message']}",
f" Offline Partitions: {status['health']['offline_partitions']}",
f" Under Replicated Partitions: {status['health']['under_replicated_partitions']}",
f" Brokers:",
f" Id Phase Started",
f" 0 {PaastaColors.green('Running')} 2020-03-25 16:24:21 ({mock_naturaltime.return_value})",
f" 1 {PaastaColors.red('Pending')} 2020-03-25 16:24:21 ({mock_naturaltime.return_value})",
" Brokers:",
" Id Phase Started",
f" 0 {PaastaColors.green('Running')} 2020-03-25 16:24:21+00:00 ({mock_naturaltime.return_value})",
f" 1 {PaastaColors.red('Pending')} 2020-03-25 16:24:21+00:00 ({mock_naturaltime.return_value})",
]
assert expected_output == output

Expand Down

0 comments on commit 6c78e90

Please sign in to comment.