Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce Pandas deprecation warnings. #263

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/zipline/_protocol.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ cdef class BarData:
df = (pd.concat(df_dict,
keys=df_dict.keys(),
names=['fields', dt_label])
.stack(dropna=False) # ensure we return all fields/assets/dates despite missing values
.stack(future_stack=True) # ensure we return all fields/assets/dates despite missing values
.unstack(level='fields'))
df.index.set_names([dt_label, 'asset'])
return df.sort_index()
Expand Down
2 changes: 1 addition & 1 deletion src/zipline/assets/asset_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def check_intersections(persymbol):
ambiguous[persymbol.name] = intersections, msg_component

mappings.groupby(["symbol", "country_code"], group_keys=False).apply(
check_intersections
check_intersections, include_groups=False
)

if ambiguous:
Expand Down
4 changes: 2 additions & 2 deletions src/zipline/data/bcolz_minute_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,8 +964,8 @@ def _minutes_to_exclude(self):
market_closes = self._market_closes.values.astype("datetime64[m]")
minutes_per_day = (market_closes - market_opens).astype(np.int64)
early_indices = np.where(minutes_per_day != self._minutes_per_day - 1)[0]
early_opens = self._market_opens[early_indices]
early_closes = self._market_closes[early_indices]
early_opens = self._market_opens.iloc[early_indices]
early_closes = self._market_closes.iloc[early_indices]
minutes = [
(market_open, early_close)
for market_open, early_close in zip(early_opens, early_closes)
Expand Down
2 changes: 1 addition & 1 deletion src/zipline/data/bundles/quandl.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def gen_asset_metadata(data, show_progress):
if show_progress:
log.info("Generating asset metadata.")

data = data.groupby(by="symbol").agg({"date": [np.min, np.max]})
data = data.groupby(by="symbol").agg({"date": ["min", "max"]})
data.reset_index(inplace=True)
data["start_date"] = data.date[np.min.__name__]
data["end_date"] = data.date[np.max.__name__]
Expand Down
6 changes: 4 additions & 2 deletions src/zipline/data/data_portal.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,9 @@ def handle_extra_source(self, source_df, sim_params):
group_names = grouped_by_sid.groups.keys()
group_dict = {}
for group_name in group_names:
group_dict[group_name] = grouped_by_sid.get_group(group_name)
# FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas.
# Pass `(name,)` instead of `name` to silence this warning.
group_dict[group_name] = grouped_by_sid.get_group((group_name,))

# This will be the dataframe which we query to get fetcher assets at
# any given time. Get's overwritten every time there's a new fetcher
Expand Down Expand Up @@ -948,7 +950,7 @@ def get_history_window(
df.iloc[0, assets_with_leading_nan] = np.array(
initial_values, dtype=np.float64
)
df.fillna(method="ffill", inplace=True)
df.ffill(inplace=True)

# forward-filling will incorrectly produce values after the end of
# an asset's lifetime, so write NaNs back over the asset's
Expand Down
3 changes: 2 additions & 1 deletion src/zipline/finance/ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,8 @@ def end_of_bar(self, session_ix):
# make daily_returns hold the partial returns, this saves many
# metrics from doing a concat and copying all of the previous
# returns
self.daily_returns_array.iloc[session_ix] = self.todays_returns
# AttributeError: 'numpy.ndarray' object has no attribute 'iloc'
self.daily_returns_array[session_ix] = self.todays_returns

def end_of_session(self, session_ix):
# save the daily returns time-series
Expand Down
3 changes: 2 additions & 1 deletion src/zipline/finance/metrics/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,8 @@ def risk_report(cls, algorithm_returns, benchmark_returns, algorithm_leverages):
start=start_session,
# Ensure we have at least one month
end=end - datetime.timedelta(days=1),
freq="M",
# FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.
freq="ME",
tz="utc",
)

Expand Down
2 changes: 1 addition & 1 deletion src/zipline/finance/slippage.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def _get_window_data(self, data, asset, window_length):
# always just NaN.
close_volatility = (
close_history[:-1]
.pct_change()[1:]
.pct_change(fill_method=None)[1:]
.std(
skipna=False,
)
Expand Down
2 changes: 1 addition & 1 deletion src/zipline/pipeline/loaders/earnings_estimates.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ def get_last_data_per_qtr(
ffill_across_cols(last_per_qtr, columns, self.name_map)
# Stack quarter and sid into the index.
stacked_last_per_qtr = last_per_qtr.stack(
[SID_FIELD_NAME, NORMALIZED_QUARTERS],
[SID_FIELD_NAME, NORMALIZED_QUARTERS], future_stack=True
)
# Set date index name for ease of reference
stacked_last_per_qtr.index.set_names(
Expand Down
2 changes: 1 addition & 1 deletion src/zipline/sources/benchmark_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def _initialize_precalculated_series(
first_day_return = (first_close - first_open) / first_open

returns = benchmark_series.pct_change()[:]
returns[0] = first_day_return
returns.iloc[0] = first_day_return
return returns, returns
else:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion src/zipline/sources/requests_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def __iter__(self):
# the dt column is dropped. So, we need to manually copy
# dt into the event.
event.dt = dt
for k, v in series.iteritems():
for k, v in series.items():
# convert numpy integer types to
# int. This assumes we are on a 64bit
# platform that will not lose information
Expand Down
14 changes: 7 additions & 7 deletions src/zipline/testing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,11 +525,11 @@ def create_daily_df_for_asset(trading_calendar, start_day, end_day, interval=1):
# only keep every 'interval' rows
for idx, _ in enumerate(days_arr):
if (idx + 1) % interval != 0:
df["open"].iloc[idx] = 0
df["high"].iloc[idx] = 0
df["low"].iloc[idx] = 0
df["close"].iloc[idx] = 0
df["volume"].iloc[idx] = 0
df.loc[idx, "open"] = 0
df.loc[idx, "high"] = 0
df.loc[idx, "low"] = 0
df.loc[idx, "close"] = 0
df.loc[idx, "volume"] = 0

return df

Expand Down Expand Up @@ -1287,13 +1287,13 @@ def patch_os_environment(remove=None, **values):
remove = remove or []
for key in remove:
old_values[key] = os.environ.pop(key)
for key, value in values.iteritems():
for key, value in values.items():
old_values[key] = os.getenv(key)
os.environ[key] = value
try:
yield
finally:
for old_key, old_value in old_values.iteritems():
for old_key, old_value in old_values.items():
if old_value is None:
# Value was not present when we entered, so del it out if it's
# still present.
Expand Down
2 changes: 1 addition & 1 deletion src/zipline/utils/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def execution_period_values(self):
# Group by ISO year (0) and week (1)
.groupby(sessions.map(lambda x: x.isocalendar()[0:2]))
.nth(self.td_delta)
.view(np.int64)
.astype(np.int64)
)


Expand Down
6 changes: 3 additions & 3 deletions tests/data/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ def test_load_raw_arrays(self):
# The value is the autogenerated value from test fixtures.
assert_almost_equal(
10.0,
opens[1][first_minute_loc],
opens.iloc[first_minute_loc][1],
err_msg="The value for Equity 1, should be 10.0, at NYSE open.",
)

Expand Down Expand Up @@ -850,7 +850,7 @@ def test_load_raw_arrays(self):

assert_almost_equal(
nan,
opens[1][tday_loc],
opens.iloc[tday_loc][1],
err_msg="2015-11-26 should be `nan`, since Thanksgiving is a "
"holiday in the reader's calendar.",
)
Expand All @@ -861,7 +861,7 @@ def test_load_raw_arrays(self):

assert_almost_equal(
nan,
opens[1][tday_loc],
opens.iloc[tday_loc][1],
err_msg="2015-11-26 should be `nan`, since Thanksgiving is a "
"holiday in the reader's calendar.",
)
Expand Down
33 changes: 17 additions & 16 deletions tests/finance/test_commissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,8 @@ def test_futures_per_trade(self):
# The capital used is only -1.0 (the commission cost) because no
# capital is actually spent to enter into a long position on a futures
# contract.
assert results.orders[1][0]["commission"] == 1.0
assert results.capital_used[1] == -1.0
assert results.orders.iloc[1][0]["commission"] == 1.0
assert results.capital_used.iloc[1] == -1.0

def test_per_share_no_minimum(self):
results = self.get_results(
Expand Down Expand Up @@ -487,9 +487,9 @@ def test_per_share_with_minimum(self):
)

# commissions should be 8, 10, 15
assert 8 == results.orders[1][0]["commission"]
assert 10 == results.orders[2][0]["commission"]
assert 15 == results.orders[3][0]["commission"]
assert 8 == results.orders.iloc[1][0]["commission"]
assert 10 == results.orders.iloc[2][0]["commission"]
assert 15 == results.orders.iloc[3][0]["commission"]

self.verify_capital_used(results, [-1008, -1002, -1005])

Expand All @@ -503,9 +503,9 @@ def test_per_share_with_minimum(self):
)

# commissions should be 12, 12, 15
assert 12 == results.orders[1][0]["commission"]
assert 12 == results.orders[2][0]["commission"]
assert 15 == results.orders[3][0]["commission"]
assert 12 == results.orders.iloc[1][0]["commission"]
assert 12 == results.orders.iloc[2][0]["commission"]
assert 15 == results.orders.iloc[3][0]["commission"]

self.verify_capital_used(results, [-1012, -1000, -1003])

Expand All @@ -519,9 +519,9 @@ def test_per_share_with_minimum(self):
)

# commissions should be 18, 18, 18
assert 18 == results.orders[1][0]["commission"]
assert 18 == results.orders[2][0]["commission"]
assert 18 == results.orders[3][0]["commission"]
assert 18 == results.orders.iloc[1][0]["commission"]
assert 18 == results.orders.iloc[2][0]["commission"]
assert 18 == results.orders.iloc[3][0]["commission"]

self.verify_capital_used(results, [-1018, -1000, -1000])

Expand Down Expand Up @@ -549,8 +549,8 @@ def test_per_contract(self, min_trade_cost, expected_commission):
),
)

assert results.orders[1][0]["commission"] == expected_commission
assert results.capital_used[1] == -expected_commission
assert results.orders.iloc[1][0]["commission"] == expected_commission
assert results.capital_used.iloc[1] == -expected_commission

def test_per_dollar(self):
results = self.get_results(
Expand Down Expand Up @@ -583,6 +583,7 @@ def test_incorrectly_set_futures_model(self):
)

def verify_capital_used(self, results, values):
assert values[0] == results.capital_used[1]
assert values[1] == results.capital_used[2]
assert values[2] == results.capital_used[3]
# assert values[0] == results.capital_used.iloc[1]
# assert values[1] == results.capital_used.iloc[2]
# assert values[2] == results.capital_used.iloc[3]
assert values == results.capital_used[1:4].tolist()
26 changes: 13 additions & 13 deletions tests/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def test_nop(self):
)

nan_then_zero = pd.Series(0.0, index=self.closes)
nan_then_zero[0] = float("nan")
nan_then_zero.iloc[0] = float("nan")
nan_then_zero_fields = (
"algo_volatility",
"benchmark_volatility",
Expand Down Expand Up @@ -667,7 +667,7 @@ def handle_data(context, data):
)

nan_then_zero = pd.Series(0.0, index=self.closes)
nan_then_zero[0] = float("nan")
nan_then_zero.iloc[0] = float("nan")
nan_then_zero_fields = (
"algo_volatility",
"benchmark_volatility",
Expand Down Expand Up @@ -727,7 +727,7 @@ def handle_data(context, data):
check_names=False,
)

expected_cash[0] += cash_modifier
expected_cash.iloc[0] += cash_modifier
assert_equal(
perf["ending_cash"],
expected_cash,
Expand All @@ -736,7 +736,7 @@ def handle_data(context, data):

# we purchased one share on the first day
expected_capital_used = pd.Series(0.0, index=self.closes)
expected_capital_used[0] += cash_modifier
expected_capital_used.iloc[0] += cash_modifier

assert_equal(
perf["capital_used"],
Expand All @@ -760,7 +760,7 @@ def handle_data(context, data):

# we don't start with any positions; the first day has no starting
# exposure
expected_position_exposure[0] = 0
expected_position_exposure.iloc[0] = 0
for field in "starting_value", "starting_exposure":
# for equities, position value and position exposure are the same
assert_equal(
Expand Down Expand Up @@ -883,8 +883,8 @@ def handle_data(context, data):
cash_modifier,
index=self.trading_minutes,
)
expected_portfolio_capital_used[0] = 0.0
expected_capital_used[0] = 0
expected_portfolio_capital_used.iloc[0] = 0.0
expected_capital_used.iloc[0] = 0
assert_equal(
portfolio_snapshots["cash_flow"],
expected_portfolio_capital_used,
Expand Down Expand Up @@ -1670,15 +1670,15 @@ def handle_data(context, data):
# we sold one share on the first day
cash_modifier = +expected_fill_price

expected_cash[1:] += cash_modifier
expected_cash.iloc[1:] += cash_modifier

assert_equal(
perf["starting_cash"],
expected_cash,
check_names=False,
)

expected_cash[0] += cash_modifier
expected_cash.iloc[0] += cash_modifier
assert_equal(
perf["ending_cash"],
expected_cash,
Expand All @@ -1687,7 +1687,7 @@ def handle_data(context, data):

# we purchased one share on the first day
expected_capital_used = pd.Series(0.0, index=self.equity_closes)
expected_capital_used[0] += cash_modifier
expected_capital_used.iloc[0] += cash_modifier

assert_equal(
perf["capital_used"],
Expand All @@ -1707,7 +1707,7 @@ def handle_data(context, data):
# we don't start with any positions; the first day has no starting
# exposure
expected_starting_exposure = expected_exposure.shift(1)
expected_starting_exposure[0] = 0.0
expected_starting_exposure.iloc[0] = 0.0
for field in "starting_value", "starting_exposure":
# for equities, position value and position exposure are the same
assert_equal(
Expand Down Expand Up @@ -1815,8 +1815,8 @@ def handle_data(context, data):
cash_modifier,
index=self.equity_minutes,
)
expected_portfolio_capital_used[0] = 0.0
expected_capital_used[0] = 0
expected_portfolio_capital_used.iloc[0] = 0.0
expected_capital_used.iloc[0] = 0
assert_equal(
portfolio_snapshots["cash_flow"],
expected_portfolio_capital_used,
Expand Down
3 changes: 2 additions & 1 deletion tests/pipeline/test_downsampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,8 @@ def check_downsampled_term(self, term):

expected_results = {
"year": (
raw_term_results.groupby(pd.Grouper(freq="AS"))
# FutureWarning: 'AS' is deprecated and will be removed in a future version, please use 'YS' instead.
raw_term_results.groupby(pd.Grouper(freq="YS"))
.first()
.reindex(compute_dates, method="ffill")
),
Expand Down
2 changes: 1 addition & 1 deletion tests/pipeline/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_same_day_pipeline(self):
# (i.e. start and end dates are the same) we should accurately get
# data for the day prior.
result = self.engine.run_pipeline(p, self.dates[1], self.dates[1])
assert result["f"][0] == 1.0
assert result["f"].iloc[0] == 1.0

def test_screen(self):
asset_ids = np.array(self.asset_ids)
Expand Down
4 changes: 2 additions & 2 deletions tests/pipeline/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def check_previous_value_results(self, column, results, dates):
# from pandas won't be tz_localized.
dates = dates.tz_localize(None)

for asset, asset_result in results.iteritems():
for asset, asset_result in results.items():
relevant_events = events[events.sid == asset.sid]
assert len(relevant_events) == 2

Expand Down Expand Up @@ -520,7 +520,7 @@ def check_next_value_results(self, column, results, dates):
# Remove timezone info from trading days, since the outputs
# from pandas won't be tz_localized.
dates = dates.tz_localize(None)
for asset, asset_result in results.iteritems():
for asset, asset_result in results.items():
relevant_events = events[events.sid == asset.sid]
assert len(relevant_events) == 2

Expand Down
Loading
Loading