Examples

Basic Usage Examples

Simple Download

Download the latest Call Report data:

from ffiec_data_collector import FFIECDownloader, Product, FileFormat

downloader = FFIECDownloader()
result = downloader.download_latest(Product.CALL_SINGLE, FileFormat.XBRL)

if result.success:
    print(f"Downloaded: {result.filename}")
    print(f"Size: {result.size_bytes:,} bytes")
    print(f"Location: {result.file_path}")
    print(f"Data last updated: {result.last_updated}")
    print(f"Call Reports updated: {result.call_updated}")
    print(f"UBPR updated: {result.ubpr_updated}")

Download Specific Quarter

# Download Q1 2024 Call Report
result = downloader.download_cdr_single_period("20240331")

# Or using the full method
result = downloader.download(
    product=Product.CALL_SINGLE,
    period="20240331",
    format=FileFormat.TSV
)

List Available Quarters

# Get available quarters for Call Reports
periods = downloader.select_product(Product.CALL_SINGLE)

print(f"Available quarters: {len(periods)}")
print(f"Latest: {periods[0].date_str}")
print(f"Earliest: {periods[-1].date_str}")

# Display recent quarters
for period in periods[:4]:
    print(f"  Q{period.quarter} {period.year}: {period.date_str}")

Working with Last Updated Dates

from datetime import date

# Download and check data freshness
result = downloader.download_latest(Product.CALL_SINGLE)

if result.success and result.last_updated:
    # Calculate days since last update
    today = date.today()
    days_old = (today - result.last_updated).days
    
    print(f"Data is {days_old} days old")
    print(f"Last updated: {result.last_updated}")
    print(f"Call Reports updated: {result.call_updated}")
    print(f"UBPR data updated: {result.ubpr_updated}")
    
    # Check if data is recent enough for your needs
    if days_old > 90:  # More than 90 days old
        print("⚠️  Data may be stale")
    else:
        print("✓ Data is reasonably fresh")
        
    # Format dates for display
    print(f"Last updated (formatted): {result.last_updated.strftime('%B %d, %Y')}")

Compare Update Dates Across Products

from datetime import date, timedelta

# Download different product types and compare their update dates
products = [
    (Product.CALL_SINGLE, "Call Report"),
    (Product.UBPR_RATIO_SINGLE, "UBPR Ratios")
]

results = []
for product, name in products:
    result = downloader.download_latest(product)
    if result.success:
        results.append((name, result))
        print(f"{name}:")
        print(f"  Last updated: {result.last_updated}")
        print(f"  Days old: {(date.today() - result.last_updated).days}")
        print(f"  File: {result.filename}")
        print()

# Find the most recently updated data
if results:
    most_recent = max(results, key=lambda x: x[1].last_updated)
    print(f"Most recently updated: {most_recent[0]} ({most_recent[1].last_updated})")

Data Freshness Filtering

from datetime import date, timedelta

def download_if_fresh(downloader, product, max_age_days=30):
    """Download data only if it's been updated recently"""
    result = downloader.download_latest(product)
    
    if not result.success:
        return None, f"Download failed: {result.error_message}"
    
    if not result.last_updated:
        return result, "Warning: No last updated date available"
    
    days_old = (date.today() - result.last_updated).days
    
    if days_old > max_age_days:
        return None, f"Data is {days_old} days old, exceeds {max_age_days} day limit"
    
    return result, f"Data is fresh ({days_old} days old)"

# Use the function
result, message = download_if_fresh(downloader, Product.CALL_SINGLE, max_age_days=60)
print(message)

if result:
    print(f"Downloaded: {result.filename}")

Advanced Examples

Bulk Download Multiple Quarters (With Rate Limiting)

from pathlib import Path
import time

downloader = FFIECDownloader(download_dir=Path("./bulk_downloads"))

# Download last 4 quarters with responsible rate limiting
quarters = ["20240331", "20231231", "20230930", "20230630"]
results = []

for quarter in quarters:
    print(f"Downloading {quarter}...", end=" ")
    result = downloader.download_cdr_single_period(quarter)
    results.append(result)
    
    if result.success:
        print(f"✓ ({result.size_bytes:,} bytes)")
    else:
        print(f"✗ Failed: {result.error_message}")
    
    # IMPORTANT: Be respectful to government servers
    # Add delay between requests to avoid overloading the server
    time.sleep(5)  # 5 second delay - adjust as needed

# Summary
successful = sum(1 for r in results if r.success)
print(f"\nCompleted: {successful}/{len(results)} downloads")

Download to Memory for Processing

from io import BytesIO
import zipfile
import xml.etree.ElementTree as ET

# Download without saving to disk
content = downloader.download(
    product=Product.CALL_SINGLE,
    period="20240331",
    format=FileFormat.XBRL,
    save_to_disk=False
)

# Process ZIP content in memory
with zipfile.ZipFile(content) as zf:
    print(f"ZIP contains {len(zf.filelist)} files")
    
    # Process XBRL files
    for file_info in zf.filelist:
        if file_info.filename.endswith('.xml'):
            with zf.open(file_info) as xml_file:
                # Parse XBRL content
                tree = ET.parse(xml_file)
                root = tree.getroot()
                print(f"Processing: {file_info.filename}")
                # Add your XBRL processing logic here

Parallel Downloads

from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

def download_quarter(downloader, quarter):
    """Download a single quarter"""
    start = datetime.now()
    result = downloader.download_cdr_single_period(quarter)
    duration = (datetime.now() - start).total_seconds()
    return quarter, result, duration

# Quarters to download
quarters = ["20240331", "20231231", "20230930", "20230630"]

# Download in parallel
with ThreadPoolExecutor(max_workers=3) as executor:
    futures = {
        executor.submit(download_quarter, downloader, q): q 
        for q in quarters
    }
    
    for future in as_completed(futures):
        quarter, result, duration = future.result()
        if result.success:
            print(f"✓ {quarter}: {result.filename} ({duration:.1f}s)")
        else:
            print(f"✗ {quarter}: Failed after {duration:.1f}s")

Download Different Products

# Download UBPR data
products_to_download = [
    (Product.UBPR_RATIO_SINGLE, "UBPR Ratios"),
    (Product.UBPR_RANK_FOUR, "UBPR Rankings"),
    (Product.UBPR_STATS_FOUR, "UBPR Statistics"),
]

quarter = "20240331"

for product, name in products_to_download:
    print(f"Downloading {name} for {quarter}...")
    result = downloader.download(product, quarter, FileFormat.XBRL)
    
    if result.success:
        print(f"  ✓ Saved: {result.filename}")
        print(f"  Size: {result.size_bytes:,} bytes")
    else:
        print(f"  ✗ Failed: {result.error_message}")

Validation Examples

Using Validated Downloader

from ffiec_data_collector import ValidatedFFIECDownloader

# This automatically checks website structure before downloading
validated_downloader = ValidatedFFIECDownloader()

try:
    result = validated_downloader.download(
        product=Product.CALL_SINGLE,
        period="20240331",
        format=FileFormat.XBRL
    )
    print(f"✓ Validation passed, downloaded: {result.filename}")
except WebpageChangeException as e:
    print(f"✗ Website structure has changed: {e}")
    print("Please update the library or contact support")

Manual Validation

from ffiec_data_collector import ThumbprintValidator

validator = ThumbprintValidator()

# Validate specific page
result = validator.validate(
    "https://cdr.ffiec.gov/public/pws/downloadbulkdata.aspx",
    "bulk_download"
)

if result["valid"]:
    print("✓ Website structure is valid")
else:
    print("✗ Website structure has changed")
    
if result.get("warnings"):
    print("Warnings:")
    for warning in result["warnings"]:
        print(f"  - {warning}")

Capture Website Thumbprint

# Capture current website structure
thumbprint = validator.capture_thumbprint(
    "https://cdr.ffiec.gov/public/pws/downloadbulkdata.aspx",
    "bulk_download"
)

print(f"Captured thumbprint:")
print(f"  URL: {thumbprint.url}")
print(f"  Timestamp: {thumbprint.timestamp}")
print(f"  Hash: {thumbprint.structural_hash}")
print(f"  ViewState present: {thumbprint.viewstate_present}")
print(f"  Products: {len(thumbprint.products)} available")

# Save for future validation
from pathlib import Path
thumbprint.save(Path("./my_thumbprint.json"))

Error Handling

from ffiec_data_collector import WebpageChangeException
import requests
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def safe_download(downloader, product, period, format):
    """Download with comprehensive error handling"""
    try:
        logger.info(f"Downloading {product.display_name} for {period}")
        result = downloader.download(product, period, format)
        
        if result.success:
            logger.info(f"Success: {result.filename} ({result.size_bytes:,} bytes)")
            return result
        else:
            logger.error(f"Download failed: {result.error_message}")
            return None
            
    except WebpageChangeException as e:
        logger.error(f"Website structure changed: {e}")
        # Could notify administrators here
        return None
        
    except requests.exceptions.ConnectionError as e:
        logger.error(f"Connection error: {e}")
        return None
        
    except requests.exceptions.Timeout as e:
        logger.error(f"Request timeout: {e}")
        return None
        
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        return None

# Use the safe download function
result = safe_download(
    downloader,
    Product.CALL_SINGLE,
    "20240331",
    FileFormat.XBRL
)

Integration with Pandas

import pandas as pd
import zipfile
from io import StringIO

def load_tsv_to_dataframe(downloader, quarter):
    """Download TSV data and load into pandas DataFrame"""
    
    # Download TSV format
    content = downloader.download(
        product=Product.CALL_SINGLE,
        period=quarter,
        format=FileFormat.TSV,
        save_to_disk=False
    )
    
    # Extract TSV files from ZIP
    dataframes = {}
    
    with zipfile.ZipFile(content) as zf:
        for file_info in zf.filelist:
            if file_info.filename.endswith('.txt'):
                with zf.open(file_info) as tsv_file:
                    # Read TSV content
                    tsv_content = tsv_file.read().decode('utf-8')
                    
                    # Load into DataFrame
                    df = pd.read_csv(
                        StringIO(tsv_content),
                        sep='\t',
                        dtype=str  # Keep all as strings initially
                    )
                    
                    # Store with filename as key
                    name = file_info.filename.replace('.txt', '')
                    dataframes[name] = df
                    
                    print(f"Loaded {name}: {len(df)} rows, {len(df.columns)} columns")
    
    return dataframes

# Load Q1 2024 data
dfs = load_tsv_to_dataframe(downloader, "20240331")

# Work with the data
if 'Schedule_RI' in dfs:  # Income Statement
    income_df = dfs['Schedule_RI']
    print(f"\nIncome Statement shape: {income_df.shape}")
    print(income_df.head())

Jupyter Notebook Example

For a comprehensive Jupyter notebook example, see examples/ffiec_data_collection_demo.ipynb in the repository. The notebook includes:

Interactive data exploration
Visualization examples
Step-by-step walkthrough
Data processing workflows
Integration with common data science libraries