# utils/stocks/collector.py import logging import requests import time from datetime import datetime, timedelta, date import psycopg2 from psycopg2.extras import execute_values from typing import Dict, Any, List, Optional, Tuple from threading import Lock class RateLimiter: def __init__(self, calls_per_minute: int): self.calls_per_minute = calls_per_minute self.calls = [] self.lock = Lock() def wait_if_needed(self): """Wait if we've exceeded our rate limit""" now = time.time() minute_ago = now - 60 with self.lock: self.calls = [call for call in self.calls if call > minute_ago] if len(self.calls) >= self.calls_per_minute: sleep_time = self.calls[0] - minute_ago if sleep_time > 0: time.sleep(sleep_time) self.calls.append(now) class StockDataClient: def __init__(self, api_key: str, rate_limit_per_minute: int = 300): self.api_key = api_key self.base_url = "https://financialmodelingprep.com/api/v3" self.rate_limiter = RateLimiter(rate_limit_per_minute) self.session = requests.Session() def get_historical_price(self, symbol: str, from_date: date) -> List[Dict]: """Fetch historical daily price data for a symbol""" self.rate_limiter.wait_if_needed() try: url = f"{self.base_url}/historical-price-full/{symbol}" params = { 'apikey': self.api_key, 'from': from_date.strftime('%Y-%m-%d'), 'to': datetime.now().date().strftime('%Y-%m-%d') } response = self.session.get(url, params=params) response.raise_for_status() data = response.json() if 'historical' not in data: logging.warning(f"No historical data found for {symbol}") return [] return data['historical'] except requests.exceptions.RequestException as e: logging.error(f"Error fetching data for {symbol}: {e}") if hasattr(response, 'status_code') and response.status_code == 429: time.sleep(5) return self.get_historical_price(symbol, from_date) return [] def deduplicate_stock_data(data: List[Tuple]) -> List[Tuple]: """Deduplicate stock data based on ticker_id and date""" seen = set() deduped_data = [] for item in data: key = (item[0], item[1]) # ticker_id and date if key not in seen: seen.add(key) deduped_data.append(item) return deduped_data def batch_insert_stocks(cursor: psycopg2.extensions.cursor, data: List[Tuple]) -> int: """Bulk insert stock data with deduplication""" if not data: return 0 try: # Deduplicate data before insertion deduped_data = deduplicate_stock_data(data) # Insert in smaller batches to avoid transaction issues batch_size = 500 total_inserted = 0 for i in range(0, len(deduped_data), batch_size): batch = deduped_data[i:i + batch_size] execute_values(cursor, """ INSERT INTO stocks (ticker_id, date, open, high, low, close, adj_close, volume) VALUES %s ON CONFLICT (ticker_id, date) DO UPDATE SET open = EXCLUDED.open, high = EXCLUDED.high, low = EXCLUDED.low, close = EXCLUDED.close, adj_close = EXCLUDED.adj_close, volume = EXCLUDED.volume """, batch) total_inserted += len(batch) return total_inserted except Exception as e: logging.error(f"Error in batch insert: {e}") raise def process_ticker_batch( tickers: List[Dict], client: StockDataClient, connection: psycopg2.extensions.connection ) -> int: """Process a batch of tickers""" cursor = connection.cursor() records_processed = 0 data_to_insert = [] try: for ticker in tickers: try: historical_data = client.get_historical_price( ticker['ticker'], ticker['last_update'] ) batch_data = [ (ticker['id'], datetime.strptime(day['date'], '%Y-%m-%d').date(), day.get('open'), day.get('high'), day.get('low'), day.get('close'), day.get('adjClose', day.get('close')), day.get('volume')) for day in historical_data ] if batch_data: # Process each ticker in its own transaction try: data_to_insert.extend(batch_data) if len(data_to_insert) >= 1000: records_processed += batch_insert_stocks(cursor, data_to_insert) data_to_insert = [] connection.commit() except Exception as e: connection.rollback() logging.error(f"Error processing batch for {ticker['ticker']}: {e}") continue except Exception as e: logging.error(f"Error processing {ticker['ticker']}: {e}") continue # Insert any remaining records if data_to_insert: try: records_processed += batch_insert_stocks(cursor, data_to_insert) connection.commit() except Exception as e: connection.rollback() logging.error(f"Error processing final batch: {e}") return records_processed finally: cursor.close() def update_stocks_batch( tickers: List[Dict], connection: psycopg2.extensions.connection, client: StockDataClient, batch_size: int = 50 ) -> int: """Update stocks in batches""" total_records = 0 # Process tickers in batches for i in range(0, len(tickers), batch_size): batch = tickers[i:i + batch_size] try: records = process_ticker_batch(batch, client, connection) total_records += records logging.info(f"Processed batch {i//batch_size + 1}, records: {records}") except Exception as e: logging.error(f"Error processing batch {i//batch_size + 1}: {e}") continue return total_records def get_stock_statistics(connection: psycopg2.extensions.connection) -> Dict[str, Any]: cursor = None try: cursor = connection.cursor() stats = {} cursor.execute(""" SELECT (SELECT COUNT(*) FROM stocks) as stocks_count, (SELECT COUNT(*) FROM tickers) as tickers_count, (SELECT MAX(date) FROM stocks) as latest_date, (SELECT COUNT(DISTINCT ticker_id) FROM stocks WHERE date >= CURRENT_DATE - INTERVAL '7 days') as recent_updates """) row = cursor.fetchone() if row: stats['stocks_count'] = row[0] stats['tickers_count'] = row[1] stats['latest_stock_date'] = row[2] stats['recently_updated_tickers'] = row[3] return stats except Exception as e: logging.error(f"Error getting stock statistics: {e}") return {} finally: if cursor: cursor.close() def track_invalid_ticker(connection, ticker: str, reason: str): cursor = None try: cursor = connection.cursor() cursor.execute(""" INSERT INTO invalid_tickers (ticker, reason) VALUES (%s, %s) ON CONFLICT (ticker) DO UPDATE SET attempts = invalid_tickers.attempts + 1, last_check = CURRENT_TIMESTAMP """, (ticker, reason)) connection.commit() finally: if cursor: cursor.close()