Examples
Basic Usage Examples
Simple Download
Download the latest Call Report data:
from ffiec_data_collector import FFIECDownloader, Product, FileFormat
downloader = FFIECDownloader()
result = downloader.download_latest(Product.CALL_SINGLE, FileFormat.XBRL)
if result.success:
print(f"Downloaded: {result.filename}")
print(f"Size: {result.size_bytes:,} bytes")
print(f"Location: {result.file_path}")
print(f"Data last updated: {result.last_updated}")
print(f"Call Reports updated: {result.call_updated}")
print(f"UBPR updated: {result.ubpr_updated}")
Download Specific Quarter
# Download Q1 2024 Call Report
result = downloader.download_cdr_single_period("20240331")
# Or using the full method
result = downloader.download(
product=Product.CALL_SINGLE,
period="20240331",
format=FileFormat.TSV
)
List Available Quarters
# Get available quarters for Call Reports
periods = downloader.select_product(Product.CALL_SINGLE)
print(f"Available quarters: {len(periods)}")
print(f"Latest: {periods[0].date_str}")
print(f"Earliest: {periods[-1].date_str}")
# Display recent quarters
for period in periods[:4]:
print(f" Q{period.quarter} {period.year}: {period.date_str}")
Working with Last Updated Dates
from datetime import date
# Download and check data freshness
result = downloader.download_latest(Product.CALL_SINGLE)
if result.success and result.last_updated:
# Calculate days since last update
today = date.today()
days_old = (today - result.last_updated).days
print(f"Data is {days_old} days old")
print(f"Last updated: {result.last_updated}")
print(f"Call Reports updated: {result.call_updated}")
print(f"UBPR data updated: {result.ubpr_updated}")
# Check if data is recent enough for your needs
if days_old > 90: # More than 90 days old
print("⚠️ Data may be stale")
else:
print("✓ Data is reasonably fresh")
# Format dates for display
print(f"Last updated (formatted): {result.last_updated.strftime('%B %d, %Y')}")
Compare Update Dates Across Products
from datetime import date, timedelta
# Download different product types and compare their update dates
products = [
(Product.CALL_SINGLE, "Call Report"),
(Product.UBPR_RATIO_SINGLE, "UBPR Ratios")
]
results = []
for product, name in products:
result = downloader.download_latest(product)
if result.success:
results.append((name, result))
print(f"{name}:")
print(f" Last updated: {result.last_updated}")
print(f" Days old: {(date.today() - result.last_updated).days}")
print(f" File: {result.filename}")
print()
# Find the most recently updated data
if results:
most_recent = max(results, key=lambda x: x[1].last_updated)
print(f"Most recently updated: {most_recent[0]} ({most_recent[1].last_updated})")
Data Freshness Filtering
from datetime import date, timedelta
def download_if_fresh(downloader, product, max_age_days=30):
"""Download data only if it's been updated recently"""
result = downloader.download_latest(product)
if not result.success:
return None, f"Download failed: {result.error_message}"
if not result.last_updated:
return result, "Warning: No last updated date available"
days_old = (date.today() - result.last_updated).days
if days_old > max_age_days:
return None, f"Data is {days_old} days old, exceeds {max_age_days} day limit"
return result, f"Data is fresh ({days_old} days old)"
# Use the function
result, message = download_if_fresh(downloader, Product.CALL_SINGLE, max_age_days=60)
print(message)
if result:
print(f"Downloaded: {result.filename}")
Advanced Examples
Bulk Download Multiple Quarters (With Rate Limiting)
from pathlib import Path
import time
downloader = FFIECDownloader(download_dir=Path("./bulk_downloads"))
# Download last 4 quarters with responsible rate limiting
quarters = ["20240331", "20231231", "20230930", "20230630"]
results = []
for quarter in quarters:
print(f"Downloading {quarter}...", end=" ")
result = downloader.download_cdr_single_period(quarter)
results.append(result)
if result.success:
print(f"✓ ({result.size_bytes:,} bytes)")
else:
print(f"✗ Failed: {result.error_message}")
# IMPORTANT: Be respectful to government servers
# Add delay between requests to avoid overloading the server
time.sleep(5) # 5 second delay - adjust as needed
# Summary
successful = sum(1 for r in results if r.success)
print(f"\nCompleted: {successful}/{len(results)} downloads")
Download to Memory for Processing
from io import BytesIO
import zipfile
import xml.etree.ElementTree as ET
# Download without saving to disk
content = downloader.download(
product=Product.CALL_SINGLE,
period="20240331",
format=FileFormat.XBRL,
save_to_disk=False
)
# Process ZIP content in memory
with zipfile.ZipFile(content) as zf:
print(f"ZIP contains {len(zf.filelist)} files")
# Process XBRL files
for file_info in zf.filelist:
if file_info.filename.endswith('.xml'):
with zf.open(file_info) as xml_file:
# Parse XBRL content
tree = ET.parse(xml_file)
root = tree.getroot()
print(f"Processing: {file_info.filename}")
# Add your XBRL processing logic here
Parallel Downloads
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
def download_quarter(downloader, quarter):
"""Download a single quarter"""
start = datetime.now()
result = downloader.download_cdr_single_period(quarter)
duration = (datetime.now() - start).total_seconds()
return quarter, result, duration
# Quarters to download
quarters = ["20240331", "20231231", "20230930", "20230630"]
# Download in parallel
with ThreadPoolExecutor(max_workers=3) as executor:
futures = {
executor.submit(download_quarter, downloader, q): q
for q in quarters
}
for future in as_completed(futures):
quarter, result, duration = future.result()
if result.success:
print(f"✓ {quarter}: {result.filename} ({duration:.1f}s)")
else:
print(f"✗ {quarter}: Failed after {duration:.1f}s")
Download Different Products
# Download UBPR data
products_to_download = [
(Product.UBPR_RATIO_SINGLE, "UBPR Ratios"),
(Product.UBPR_RANK_FOUR, "UBPR Rankings"),
(Product.UBPR_STATS_FOUR, "UBPR Statistics"),
]
quarter = "20240331"
for product, name in products_to_download:
print(f"Downloading {name} for {quarter}...")
result = downloader.download(product, quarter, FileFormat.XBRL)
if result.success:
print(f" ✓ Saved: {result.filename}")
print(f" Size: {result.size_bytes:,} bytes")
else:
print(f" ✗ Failed: {result.error_message}")
Validation Examples
Using Validated Downloader
from ffiec_data_collector import ValidatedFFIECDownloader
# This automatically checks website structure before downloading
validated_downloader = ValidatedFFIECDownloader()
try:
result = validated_downloader.download(
product=Product.CALL_SINGLE,
period="20240331",
format=FileFormat.XBRL
)
print(f"✓ Validation passed, downloaded: {result.filename}")
except WebpageChangeException as e:
print(f"✗ Website structure has changed: {e}")
print("Please update the library or contact support")
Manual Validation
from ffiec_data_collector import ThumbprintValidator
validator = ThumbprintValidator()
# Validate specific page
result = validator.validate(
"https://cdr.ffiec.gov/public/pws/downloadbulkdata.aspx",
"bulk_download"
)
if result["valid"]:
print("✓ Website structure is valid")
else:
print("✗ Website structure has changed")
if result.get("warnings"):
print("Warnings:")
for warning in result["warnings"]:
print(f" - {warning}")
Capture Website Thumbprint
# Capture current website structure
thumbprint = validator.capture_thumbprint(
"https://cdr.ffiec.gov/public/pws/downloadbulkdata.aspx",
"bulk_download"
)
print(f"Captured thumbprint:")
print(f" URL: {thumbprint.url}")
print(f" Timestamp: {thumbprint.timestamp}")
print(f" Hash: {thumbprint.structural_hash}")
print(f" ViewState present: {thumbprint.viewstate_present}")
print(f" Products: {len(thumbprint.products)} available")
# Save for future validation
from pathlib import Path
thumbprint.save(Path("./my_thumbprint.json"))
Error Handling
from ffiec_data_collector import WebpageChangeException
import requests
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def safe_download(downloader, product, period, format):
"""Download with comprehensive error handling"""
try:
logger.info(f"Downloading {product.display_name} for {period}")
result = downloader.download(product, period, format)
if result.success:
logger.info(f"Success: {result.filename} ({result.size_bytes:,} bytes)")
return result
else:
logger.error(f"Download failed: {result.error_message}")
return None
except WebpageChangeException as e:
logger.error(f"Website structure changed: {e}")
# Could notify administrators here
return None
except requests.exceptions.ConnectionError as e:
logger.error(f"Connection error: {e}")
return None
except requests.exceptions.Timeout as e:
logger.error(f"Request timeout: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error: {e}")
return None
# Use the safe download function
result = safe_download(
downloader,
Product.CALL_SINGLE,
"20240331",
FileFormat.XBRL
)
Integration with Pandas
import pandas as pd
import zipfile
from io import StringIO
def load_tsv_to_dataframe(downloader, quarter):
"""Download TSV data and load into pandas DataFrame"""
# Download TSV format
content = downloader.download(
product=Product.CALL_SINGLE,
period=quarter,
format=FileFormat.TSV,
save_to_disk=False
)
# Extract TSV files from ZIP
dataframes = {}
with zipfile.ZipFile(content) as zf:
for file_info in zf.filelist:
if file_info.filename.endswith('.txt'):
with zf.open(file_info) as tsv_file:
# Read TSV content
tsv_content = tsv_file.read().decode('utf-8')
# Load into DataFrame
df = pd.read_csv(
StringIO(tsv_content),
sep='\t',
dtype=str # Keep all as strings initially
)
# Store with filename as key
name = file_info.filename.replace('.txt', '')
dataframes[name] = df
print(f"Loaded {name}: {len(df)} rows, {len(df.columns)} columns")
return dataframes
# Load Q1 2024 data
dfs = load_tsv_to_dataframe(downloader, "20240331")
# Work with the data
if 'Schedule_RI' in dfs: # Income Statement
income_df = dfs['Schedule_RI']
print(f"\nIncome Statement shape: {income_df.shape}")
print(income_df.head())
Jupyter Notebook Example
For a comprehensive Jupyter notebook example, see examples/ffiec_data_collection_demo.ipynb in the repository. The notebook includes:
Interactive data exploration
Visualization examples
Step-by-step walkthrough
Data processing workflows
Integration with common data science libraries