612 lines
22 KiB
Python
612 lines
22 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# yfinance - market data downloader
|
|
# https://github.com/ranaroussi/yfinance
|
|
#
|
|
# Copyright 2017-2019 Ran Aroussi
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
from __future__ import print_function
|
|
|
|
from io import StringIO
|
|
import json as _json
|
|
import warnings
|
|
from typing import Optional, Union
|
|
from urllib.parse import quote as urlencode
|
|
|
|
import pandas as pd
|
|
import requests
|
|
|
|
from . import utils, cache
|
|
from .data import YfData
|
|
from .exceptions import YFEarningsDateMissing
|
|
from .scrapers.analysis import Analysis
|
|
from .scrapers.fundamentals import Fundamentals
|
|
from .scrapers.holders import Holders
|
|
from .scrapers.quote import Quote, FastInfo
|
|
from .scrapers.history import PriceHistory
|
|
|
|
from .const import _BASE_URL_, _ROOT_URL_
|
|
|
|
|
|
class TickerBase:
|
|
def __init__(self, ticker, session=None, proxy=None):
|
|
self.ticker = ticker.upper()
|
|
self.proxy = proxy
|
|
self.session = session
|
|
self._tz = None
|
|
|
|
self._isin = None
|
|
self._news = []
|
|
self._shares = None
|
|
|
|
self._earnings_dates = {}
|
|
|
|
self._earnings = None
|
|
self._financials = None
|
|
|
|
# accept isin as ticker
|
|
if utils.is_isin(self.ticker):
|
|
self.ticker = utils.get_ticker_by_isin(self.ticker, None, session)
|
|
|
|
self._data: YfData = YfData(session=session)
|
|
|
|
# self._price_history = PriceHistory(self._data, self.ticker)
|
|
self._price_history = None # lazy-load
|
|
self._analysis = Analysis(self._data, self.ticker)
|
|
self._holders = Holders(self._data, self.ticker)
|
|
self._quote = Quote(self._data, self.ticker)
|
|
self._fundamentals = Fundamentals(self._data, self.ticker)
|
|
|
|
self._fast_info = None
|
|
|
|
@utils.log_indent_decorator
|
|
def history(self, *args, **kwargs) -> pd.DataFrame:
|
|
return self._lazy_load_price_history().history(*args, **kwargs)
|
|
|
|
# ------------------------
|
|
|
|
def _lazy_load_price_history(self):
|
|
if self._price_history is None:
|
|
self._price_history = PriceHistory(self._data, self.ticker, self._get_ticker_tz(self.proxy, timeout=10))
|
|
return self._price_history
|
|
|
|
def _get_ticker_tz(self, proxy, timeout):
|
|
proxy = proxy or self.proxy
|
|
if self._tz is not None:
|
|
return self._tz
|
|
c = cache.get_tz_cache()
|
|
tz = c.lookup(self.ticker)
|
|
|
|
if tz and not utils.is_valid_timezone(tz):
|
|
# Clear from cache and force re-fetch
|
|
c.store(self.ticker, None)
|
|
tz = None
|
|
|
|
if tz is None:
|
|
tz = self._fetch_ticker_tz(proxy, timeout)
|
|
|
|
if utils.is_valid_timezone(tz):
|
|
# info fetch is relatively slow so cache timezone
|
|
c.store(self.ticker, tz)
|
|
else:
|
|
tz = None
|
|
|
|
self._tz = tz
|
|
return tz
|
|
|
|
@utils.log_indent_decorator
|
|
def _fetch_ticker_tz(self, proxy, timeout):
|
|
# Query Yahoo for fast price data just to get returned timezone
|
|
proxy = proxy or self.proxy
|
|
logger = utils.get_yf_logger()
|
|
|
|
params = {"range": "1d", "interval": "1d"}
|
|
|
|
# Getting data from json
|
|
url = f"{_BASE_URL_}/v8/finance/chart/{self.ticker}"
|
|
|
|
try:
|
|
data = self._data.cache_get(url=url, params=params, proxy=proxy, timeout=timeout)
|
|
data = data.json()
|
|
except Exception as e:
|
|
logger.error(f"Failed to get ticker '{self.ticker}' reason: {e}")
|
|
return None
|
|
else:
|
|
error = data.get('chart', {}).get('error', None)
|
|
if error:
|
|
# explicit error from yahoo API
|
|
logger.debug(f"Got error from yahoo api for ticker {self.ticker}, Error: {error}")
|
|
else:
|
|
try:
|
|
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
|
except Exception as err:
|
|
logger.error(f"Could not get exchangeTimezoneName for ticker '{self.ticker}' reason: {err}")
|
|
logger.debug("Got response: ")
|
|
logger.debug("-------------")
|
|
logger.debug(f" {data}")
|
|
logger.debug("-------------")
|
|
return None
|
|
|
|
def get_recommendations(self, proxy=None, as_dict=False):
|
|
"""
|
|
Returns a DataFrame with the recommendations
|
|
Columns: period strongBuy buy hold sell strongSell
|
|
"""
|
|
self._quote.proxy = proxy or self.proxy
|
|
data = self._quote.recommendations
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_recommendations_summary(self, proxy=None, as_dict=False):
|
|
return self.get_recommendations(proxy=proxy, as_dict=as_dict)
|
|
|
|
def get_upgrades_downgrades(self, proxy=None, as_dict=False):
|
|
"""
|
|
Returns a DataFrame with the recommendations changes (upgrades/downgrades)
|
|
Index: date of grade
|
|
Columns: firm toGrade fromGrade action
|
|
"""
|
|
self._quote.proxy = proxy or self.proxy
|
|
data = self._quote.upgrades_downgrades
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_calendar(self, proxy=None) -> dict:
|
|
self._quote.proxy = proxy or self.proxy
|
|
return self._quote.calendar
|
|
|
|
def get_major_holders(self, proxy=None, as_dict=False):
|
|
self._holders.proxy = proxy or self.proxy
|
|
data = self._holders.major
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_institutional_holders(self, proxy=None, as_dict=False):
|
|
self._holders.proxy = proxy or self.proxy
|
|
data = self._holders.institutional
|
|
if data is not None:
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_mutualfund_holders(self, proxy=None, as_dict=False):
|
|
self._holders.proxy = proxy or self.proxy
|
|
data = self._holders.mutualfund
|
|
if data is not None:
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_insider_purchases(self, proxy=None, as_dict=False):
|
|
self._holders.proxy = proxy or self.proxy
|
|
data = self._holders.insider_purchases
|
|
if data is not None:
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_insider_transactions(self, proxy=None, as_dict=False):
|
|
self._holders.proxy = proxy or self.proxy
|
|
data = self._holders.insider_transactions
|
|
if data is not None:
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_insider_roster_holders(self, proxy=None, as_dict=False):
|
|
self._holders.proxy = proxy or self.proxy
|
|
data = self._holders.insider_roster
|
|
if data is not None:
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_info(self, proxy=None) -> dict:
|
|
self._quote.proxy = proxy or self.proxy
|
|
data = self._quote.info
|
|
return data
|
|
|
|
def get_fast_info(self, proxy=None):
|
|
if self._fast_info is None:
|
|
self._fast_info = FastInfo(self, proxy=proxy)
|
|
return self._fast_info
|
|
|
|
@property
|
|
def basic_info(self):
|
|
warnings.warn("'Ticker.basic_info' is renamed to 'Ticker.fast_info', hopefully purpose is clearer", DeprecationWarning)
|
|
return self.fast_info
|
|
|
|
def get_sustainability(self, proxy=None, as_dict=False):
|
|
self._quote.proxy = proxy or self.proxy
|
|
data = self._quote.sustainability
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_analyst_price_target(self, proxy=None, as_dict=False):
|
|
self._analysis.proxy = proxy or self.proxy
|
|
data = self._analysis.analyst_price_target
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_rev_forecast(self, proxy=None, as_dict=False):
|
|
self._analysis.proxy = proxy or self.proxy
|
|
data = self._analysis.rev_est
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_earnings_forecast(self, proxy=None, as_dict=False):
|
|
self._analysis.proxy = proxy or self.proxy
|
|
data = self._analysis.eps_est
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_trend_details(self, proxy=None, as_dict=False):
|
|
self._analysis.proxy = proxy or self.proxy
|
|
data = self._analysis.analyst_trend_details
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_earnings_trend(self, proxy=None, as_dict=False):
|
|
self._analysis.proxy = proxy or self.proxy
|
|
data = self._analysis.earnings_trend
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_earnings(self, proxy=None, as_dict=False, freq="yearly"):
|
|
"""
|
|
:Parameters:
|
|
as_dict: bool
|
|
Return table as Python dict
|
|
Default is False
|
|
freq: str
|
|
"yearly" or "quarterly"
|
|
Default is "yearly"
|
|
proxy: str
|
|
Optional. Proxy server URL scheme
|
|
Default is None
|
|
"""
|
|
self._fundamentals.proxy = proxy or self.proxy
|
|
data = self._fundamentals.earnings[freq]
|
|
if as_dict:
|
|
dict_data = data.to_dict()
|
|
dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings[
|
|
'financialCurrency']
|
|
return dict_data
|
|
return data
|
|
|
|
def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
|
"""
|
|
:Parameters:
|
|
as_dict: bool
|
|
Return table as Python dict
|
|
Default is False
|
|
pretty: bool
|
|
Format row names nicely for readability
|
|
Default is False
|
|
freq: str
|
|
"yearly" or "quarterly"
|
|
Default is "yearly"
|
|
proxy: str
|
|
Optional. Proxy server URL scheme
|
|
Default is None
|
|
"""
|
|
self._fundamentals.proxy = proxy or self.proxy
|
|
|
|
data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy)
|
|
|
|
if pretty:
|
|
data = data.copy()
|
|
data.index = utils.camel2title(data.index, sep=' ', acronyms=["EBIT", "EBITDA", "EPS", "NI"])
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_incomestmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
|
return self.get_income_stmt(proxy, as_dict, pretty, freq)
|
|
|
|
def get_financials(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
|
return self.get_income_stmt(proxy, as_dict, pretty, freq)
|
|
|
|
def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
|
"""
|
|
:Parameters:
|
|
as_dict: bool
|
|
Return table as Python dict
|
|
Default is False
|
|
pretty: bool
|
|
Format row names nicely for readability
|
|
Default is False
|
|
freq: str
|
|
"yearly" or "quarterly"
|
|
Default is "yearly"
|
|
proxy: str
|
|
Optional. Proxy server URL scheme
|
|
Default is None
|
|
"""
|
|
self._fundamentals.proxy = proxy or self.proxy
|
|
|
|
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy)
|
|
|
|
if pretty:
|
|
data = data.copy()
|
|
data.index = utils.camel2title(data.index, sep=' ', acronyms=["PPE"])
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_balancesheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
|
return self.get_balance_sheet(proxy, as_dict, pretty, freq)
|
|
|
|
def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly") -> Union[pd.DataFrame, dict]:
|
|
"""
|
|
:Parameters:
|
|
as_dict: bool
|
|
Return table as Python dict
|
|
Default is False
|
|
pretty: bool
|
|
Format row names nicely for readability
|
|
Default is False
|
|
freq: str
|
|
"yearly" or "quarterly"
|
|
Default is "yearly"
|
|
proxy: str
|
|
Optional. Proxy server URL scheme
|
|
Default is None
|
|
"""
|
|
self._fundamentals.proxy = proxy or self.proxy
|
|
|
|
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy)
|
|
|
|
if pretty:
|
|
data = data.copy()
|
|
data.index = utils.camel2title(data.index, sep=' ', acronyms=["PPE"])
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
def get_cashflow(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
|
return self.get_cash_flow(proxy, as_dict, pretty, freq)
|
|
|
|
def get_dividends(self, proxy=None) -> pd.Series:
|
|
return self._lazy_load_price_history().get_dividends(proxy)
|
|
|
|
def get_capital_gains(self, proxy=None) -> pd.Series:
|
|
return self._lazy_load_price_history().get_capital_gains(proxy)
|
|
|
|
def get_splits(self, proxy=None) -> pd.Series:
|
|
return self._lazy_load_price_history().get_splits(proxy)
|
|
|
|
def get_actions(self, proxy=None) -> pd.Series:
|
|
return self._lazy_load_price_history().get_actions(proxy)
|
|
|
|
def get_shares(self, proxy=None, as_dict=False) -> Union[pd.DataFrame, dict]:
|
|
self._fundamentals.proxy = proxy or self.proxy
|
|
data = self._fundamentals.shares
|
|
if as_dict:
|
|
return data.to_dict()
|
|
return data
|
|
|
|
@utils.log_indent_decorator
|
|
def get_shares_full(self, start=None, end=None, proxy=None):
|
|
logger = utils.get_yf_logger()
|
|
|
|
# Process dates
|
|
tz = self._get_ticker_tz(proxy=proxy, timeout=10)
|
|
dt_now = pd.Timestamp.utcnow().tz_convert(tz)
|
|
if start is not None:
|
|
start_ts = utils._parse_user_dt(start, tz)
|
|
start = pd.Timestamp.fromtimestamp(start_ts).tz_localize("UTC").tz_convert(tz)
|
|
if end is not None:
|
|
end_ts = utils._parse_user_dt(end, tz)
|
|
end = pd.Timestamp.fromtimestamp(end_ts).tz_localize("UTC").tz_convert(tz)
|
|
if end is None:
|
|
end = dt_now
|
|
if start is None:
|
|
start = end - pd.Timedelta(days=548) # 18 months
|
|
if start >= end:
|
|
logger.error("Start date must be before end")
|
|
return None
|
|
start = start.floor("D")
|
|
end = end.ceil("D")
|
|
|
|
# Fetch
|
|
ts_url_base = f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self.ticker}?symbol={self.ticker}"
|
|
shares_url = f"{ts_url_base}&period1={int(start.timestamp())}&period2={int(end.timestamp())}"
|
|
try:
|
|
json_data = self._data.cache_get(url=shares_url, proxy=proxy)
|
|
json_data = json_data.json()
|
|
except (_json.JSONDecodeError, requests.exceptions.RequestException):
|
|
logger.error(f"{self.ticker}: Yahoo web request for share count failed")
|
|
return None
|
|
try:
|
|
fail = json_data["finance"]["error"]["code"] == "Bad Request"
|
|
except KeyError:
|
|
fail = False
|
|
if fail:
|
|
logger.error(f"{self.ticker}: Yahoo web request for share count failed")
|
|
return None
|
|
|
|
shares_data = json_data["timeseries"]["result"]
|
|
if "shares_out" not in shares_data[0]:
|
|
return None
|
|
try:
|
|
df = pd.Series(shares_data[0]["shares_out"], index=pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
|
|
except Exception as e:
|
|
logger.error(f"{self.ticker}: Failed to parse shares count data: {e}")
|
|
return None
|
|
|
|
df.index = df.index.tz_localize(tz)
|
|
df = df.sort_index()
|
|
return df
|
|
|
|
def get_isin(self, proxy=None) -> Optional[str]:
|
|
# *** experimental ***
|
|
if self._isin is not None:
|
|
return self._isin
|
|
|
|
ticker = self.ticker.upper()
|
|
|
|
if "-" in ticker or "^" in ticker:
|
|
self._isin = '-'
|
|
return self._isin
|
|
|
|
q = ticker
|
|
|
|
self._quote.proxy = proxy or self.proxy
|
|
if self._quote.info is None:
|
|
# Don't print error message cause self._quote.info will print one
|
|
return None
|
|
if "shortName" in self._quote.info:
|
|
q = self._quote.info['shortName']
|
|
|
|
url = f'https://markets.businessinsider.com/ajax/SearchController_Suggest?max_results=25&query={urlencode(q)}'
|
|
data = self._data.cache_get(url=url, proxy=proxy).text
|
|
|
|
search_str = f'"{ticker}|'
|
|
if search_str not in data:
|
|
if q.lower() in data.lower():
|
|
search_str = '"|'
|
|
if search_str not in data:
|
|
self._isin = '-'
|
|
return self._isin
|
|
else:
|
|
self._isin = '-'
|
|
return self._isin
|
|
|
|
self._isin = data.split(search_str)[1].split('"')[0].split('|')[0]
|
|
return self._isin
|
|
|
|
def get_news(self, proxy=None) -> list:
|
|
if self._news:
|
|
return self._news
|
|
|
|
# Getting data from json
|
|
url = f"{_BASE_URL_}/v1/finance/search?q={self.ticker}"
|
|
data = self._data.cache_get(url=url, proxy=proxy)
|
|
if "Will be right back" in data.text:
|
|
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
|
|
"Our engineers are working quickly to resolve "
|
|
"the issue. Thank you for your patience.")
|
|
data = data.json()
|
|
|
|
# parse news
|
|
self._news = data.get("news", [])
|
|
return self._news
|
|
|
|
@utils.log_indent_decorator
|
|
def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]:
|
|
"""
|
|
Get earning dates (future and historic)
|
|
:param limit: max amount of upcoming and recent earnings dates to return.
|
|
Default value 12 should return next 4 quarters and last 8 quarters.
|
|
Increase if more history is needed.
|
|
|
|
:param proxy: requests proxy to use.
|
|
:return: pandas dataframe
|
|
"""
|
|
if self._earnings_dates and limit in self._earnings_dates:
|
|
return self._earnings_dates[limit]
|
|
|
|
logger = utils.get_yf_logger()
|
|
|
|
page_size = min(limit, 100) # YF caps at 100, don't go higher
|
|
page_offset = 0
|
|
dates = None
|
|
while True:
|
|
url = f"{_ROOT_URL_}/calendar/earnings?symbol={self.ticker}&offset={page_offset}&size={page_size}"
|
|
data = self._data.cache_get(url=url, proxy=proxy).text
|
|
|
|
if "Will be right back" in data:
|
|
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
|
|
"Our engineers are working quickly to resolve "
|
|
"the issue. Thank you for your patience.")
|
|
|
|
try:
|
|
data = pd.read_html(StringIO(data))[0]
|
|
except ValueError:
|
|
if page_offset == 0:
|
|
# Should not fail on first page
|
|
if "Showing Earnings for:" in data:
|
|
# Actually YF was successful, problem is company doesn't have earnings history
|
|
dates = utils.empty_earnings_dates_df()
|
|
break
|
|
if dates is None:
|
|
dates = data
|
|
else:
|
|
dates = pd.concat([dates, data], axis=0)
|
|
|
|
page_offset += page_size
|
|
# got less data then we asked for or already fetched all we requested, no need to fetch more pages
|
|
if len(data) < page_size or len(dates) >= limit:
|
|
dates = dates.iloc[:limit]
|
|
break
|
|
else:
|
|
# do not fetch more than needed next time
|
|
page_size = min(limit - len(dates), page_size)
|
|
|
|
if dates is None or dates.shape[0] == 0:
|
|
_exception = YFEarningsDateMissing(self.ticker)
|
|
err_msg = str(_exception)
|
|
logger.error(f'{self.ticker}: {err_msg}')
|
|
return None
|
|
dates = dates.reset_index(drop=True)
|
|
|
|
# Drop redundant columns
|
|
dates = dates.drop(["Symbol", "Company"], axis=1)
|
|
|
|
# Convert types
|
|
for cn in ["EPS Estimate", "Reported EPS", "Surprise(%)"]:
|
|
dates.loc[dates[cn] == '-', cn] = float("nan")
|
|
dates[cn] = dates[cn].astype(float)
|
|
|
|
# Convert % to range 0->1:
|
|
dates["Surprise(%)"] *= 0.01
|
|
|
|
# Parse earnings date string
|
|
cn = "Earnings Date"
|
|
# - remove AM/PM and timezone from date string
|
|
tzinfo = dates[cn].str.extract('([AP]M[a-zA-Z]*)$')
|
|
dates[cn] = dates[cn].replace(' [AP]M[a-zA-Z]*$', '', regex=True)
|
|
# - split AM/PM from timezone
|
|
tzinfo = tzinfo[0].str.extract('([AP]M)([a-zA-Z]*)', expand=True)
|
|
tzinfo.columns = ["AM/PM", "TZ"]
|
|
# - combine and parse
|
|
dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"]
|
|
dates[cn] = pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
|
|
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
|
|
self._quote.proxy = proxy or self.proxy
|
|
tz = self._get_ticker_tz(proxy=proxy, timeout=30)
|
|
dates[cn] = dates[cn].dt.tz_localize(tz)
|
|
|
|
dates = dates.set_index("Earnings Date")
|
|
|
|
self._earnings_dates[limit] = dates
|
|
|
|
return dates
|
|
|
|
def get_history_metadata(self, proxy=None) -> dict:
|
|
return self._lazy_load_price_history().get_history_metadata(proxy)
|