142 lines
5.6 KiB
Python
142 lines
5.6 KiB
Python
import requests
|
|
import logging
|
|
from time import sleep
|
|
from typing import List, Dict, Any, Optional
|
|
from datetime import datetime
|
|
|
|
class TranscriptCollector:
|
|
def __init__(self, api_key: str):
|
|
self.api_key = api_key
|
|
self.base_url = "https://financialmodelingprep.com/api/v3"
|
|
self.base_url_v4 = "https://financialmodelingprep.com/api/v4"
|
|
self.rate_limit_pause = 0.5 # Increased from 0.2
|
|
self.stats = {
|
|
'requests': 0,
|
|
'successful': 0,
|
|
'failed': 0,
|
|
'empty': 0
|
|
}
|
|
|
|
def _make_request(self, url: str, params: Dict, max_retries: int = 3) -> Any:
|
|
"""Make API request with retry logic."""
|
|
self.stats['requests'] += 1
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
sleep(self.rate_limit_pause * (attempt + 1)) # Incremental backoff
|
|
response = requests.get(url, params=params)
|
|
response.raise_for_status()
|
|
|
|
if not response.content:
|
|
self.stats['empty'] += 1
|
|
logging.warning(f"Empty response from {url}")
|
|
return []
|
|
|
|
data = response.json()
|
|
self.stats['successful'] += 1
|
|
return data
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
self.stats['failed'] += 1
|
|
logging.error(f"API request error (attempt {attempt + 1}/{max_retries}) for {url}: {str(e)}")
|
|
if attempt == max_retries - 1: # Last attempt
|
|
return []
|
|
sleep(2 ** attempt) # Exponential backoff
|
|
|
|
except Exception as e:
|
|
self.stats['failed'] += 1
|
|
logging.error(f"Unexpected error for {url}: {str(e)}")
|
|
return []
|
|
|
|
def get_transcript_dates(self, ticker: str) -> List[Dict]:
|
|
"""Get list of available transcript dates with validation."""
|
|
try:
|
|
# Try v4 endpoint first
|
|
params = {'apikey': self.api_key, 'symbol': ticker}
|
|
response = self._make_request(f"{self.base_url_v4}/earning_call_transcript", params)
|
|
|
|
valid_dates = []
|
|
if isinstance(response, (list, dict)):
|
|
dates = [response] if isinstance(response, dict) else response
|
|
for date_info in dates:
|
|
try:
|
|
year = int(date_info.get('year', 0))
|
|
quarter = int(date_info.get('quarter', 0))
|
|
if 1900 <= year <= datetime.now().year and 1 <= quarter <= 4:
|
|
valid_dates.append(date_info)
|
|
else:
|
|
logging.warning(f"Invalid date info for {ticker}: {date_info}")
|
|
except (TypeError, ValueError):
|
|
continue
|
|
|
|
return valid_dates
|
|
except Exception as e:
|
|
logging.error(f"Error getting transcript dates for {ticker}: {e}")
|
|
return []
|
|
|
|
def get_transcript(self, ticker: str, year: int, quarter: int) -> Optional[Dict]:
|
|
"""Get specific earnings call transcript with fallback strategy."""
|
|
try:
|
|
# Try batch endpoint first for better performance
|
|
batch_transcripts = self.get_batch_transcripts(ticker, year)
|
|
matching_transcript = next(
|
|
(t for t in batch_transcripts if t.get('quarter') == quarter),
|
|
None
|
|
)
|
|
if matching_transcript:
|
|
return matching_transcript
|
|
|
|
# Fallback to individual transcript endpoint
|
|
params = {
|
|
'apikey': self.api_key,
|
|
'year': year,
|
|
'quarter': quarter
|
|
}
|
|
response = self._make_request(f"{self.base_url}/earning_call_transcript/{ticker}", params)
|
|
|
|
if isinstance(response, list) and response:
|
|
return response[0]
|
|
elif isinstance(response, dict):
|
|
return response
|
|
|
|
logging.warning(f"No transcript found for {ticker} {year}Q{quarter}")
|
|
return None
|
|
except Exception as e:
|
|
logging.error(f"Error getting transcript for {ticker} {year}Q{quarter}: {e}")
|
|
return None
|
|
|
|
def get_batch_transcripts(self, ticker: str, year: int) -> List[Dict]:
|
|
"""Get all transcripts for a specific year with validation."""
|
|
try:
|
|
params = {'apikey': self.api_key, 'year': year}
|
|
response = self._make_request(f"{self.base_url_v4}/batch_earning_call_transcript/{ticker}", params)
|
|
|
|
if isinstance(response, dict):
|
|
return [response]
|
|
elif isinstance(response, list):
|
|
return [t for t in response if self._validate_transcript(t)]
|
|
|
|
return []
|
|
except Exception as e:
|
|
logging.error(f"Error getting batch transcripts for {ticker} {year}: {e}")
|
|
return []
|
|
|
|
def _validate_transcript(self, transcript: Dict) -> bool:
|
|
"""Validate transcript data."""
|
|
try:
|
|
if not transcript.get('content'):
|
|
return False
|
|
|
|
year = int(transcript.get('year', 0))
|
|
quarter = int(transcript.get('quarter', 0))
|
|
|
|
return (1900 <= year <= datetime.now().year and
|
|
1 <= quarter <= 4 and
|
|
len(transcript['content'].strip()) > 100)
|
|
except (TypeError, ValueError, KeyError):
|
|
return False
|
|
|
|
def get_stats(self) -> Dict:
|
|
"""Get collector statistics."""
|
|
return self.stats
|