RivaCube/utils/Stocks/transcripts/collector.py
2025-02-04 19:31:18 +01:00

142 lines
5.6 KiB
Python

import requests
import logging
from time import sleep
from typing import List, Dict, Any, Optional
from datetime import datetime
class TranscriptCollector:
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = "https://financialmodelingprep.com/api/v3"
self.base_url_v4 = "https://financialmodelingprep.com/api/v4"
self.rate_limit_pause = 0.5 # Increased from 0.2
self.stats = {
'requests': 0,
'successful': 0,
'failed': 0,
'empty': 0
}
def _make_request(self, url: str, params: Dict, max_retries: int = 3) -> Any:
"""Make API request with retry logic."""
self.stats['requests'] += 1
for attempt in range(max_retries):
try:
sleep(self.rate_limit_pause * (attempt + 1)) # Incremental backoff
response = requests.get(url, params=params)
response.raise_for_status()
if not response.content:
self.stats['empty'] += 1
logging.warning(f"Empty response from {url}")
return []
data = response.json()
self.stats['successful'] += 1
return data
except requests.exceptions.RequestException as e:
self.stats['failed'] += 1
logging.error(f"API request error (attempt {attempt + 1}/{max_retries}) for {url}: {str(e)}")
if attempt == max_retries - 1: # Last attempt
return []
sleep(2 ** attempt) # Exponential backoff
except Exception as e:
self.stats['failed'] += 1
logging.error(f"Unexpected error for {url}: {str(e)}")
return []
def get_transcript_dates(self, ticker: str) -> List[Dict]:
"""Get list of available transcript dates with validation."""
try:
# Try v4 endpoint first
params = {'apikey': self.api_key, 'symbol': ticker}
response = self._make_request(f"{self.base_url_v4}/earning_call_transcript", params)
valid_dates = []
if isinstance(response, (list, dict)):
dates = [response] if isinstance(response, dict) else response
for date_info in dates:
try:
year = int(date_info.get('year', 0))
quarter = int(date_info.get('quarter', 0))
if 1900 <= year <= datetime.now().year and 1 <= quarter <= 4:
valid_dates.append(date_info)
else:
logging.warning(f"Invalid date info for {ticker}: {date_info}")
except (TypeError, ValueError):
continue
return valid_dates
except Exception as e:
logging.error(f"Error getting transcript dates for {ticker}: {e}")
return []
def get_transcript(self, ticker: str, year: int, quarter: int) -> Optional[Dict]:
"""Get specific earnings call transcript with fallback strategy."""
try:
# Try batch endpoint first for better performance
batch_transcripts = self.get_batch_transcripts(ticker, year)
matching_transcript = next(
(t for t in batch_transcripts if t.get('quarter') == quarter),
None
)
if matching_transcript:
return matching_transcript
# Fallback to individual transcript endpoint
params = {
'apikey': self.api_key,
'year': year,
'quarter': quarter
}
response = self._make_request(f"{self.base_url}/earning_call_transcript/{ticker}", params)
if isinstance(response, list) and response:
return response[0]
elif isinstance(response, dict):
return response
logging.warning(f"No transcript found for {ticker} {year}Q{quarter}")
return None
except Exception as e:
logging.error(f"Error getting transcript for {ticker} {year}Q{quarter}: {e}")
return None
def get_batch_transcripts(self, ticker: str, year: int) -> List[Dict]:
"""Get all transcripts for a specific year with validation."""
try:
params = {'apikey': self.api_key, 'year': year}
response = self._make_request(f"{self.base_url_v4}/batch_earning_call_transcript/{ticker}", params)
if isinstance(response, dict):
return [response]
elif isinstance(response, list):
return [t for t in response if self._validate_transcript(t)]
return []
except Exception as e:
logging.error(f"Error getting batch transcripts for {ticker} {year}: {e}")
return []
def _validate_transcript(self, transcript: Dict) -> bool:
"""Validate transcript data."""
try:
if not transcript.get('content'):
return False
year = int(transcript.get('year', 0))
quarter = int(transcript.get('quarter', 0))
return (1900 <= year <= datetime.now().year and
1 <= quarter <= 4 and
len(transcript['content'].strip()) > 100)
except (TypeError, ValueError, KeyError):
return False
def get_stats(self) -> Dict:
"""Get collector statistics."""
return self.stats