Extract Financial Statements

Learn how to extract and work with financial statements from SEC filings using EdgarTools' powerful XBRL processing capabilities.

Prerequisites

Basic understanding of financial statements (balance sheet, income statement, cash flow)
Familiarity with finding companies and searching filings

Quick Start: Single Period Statements

Get Latest Financial Statements

The fastest way to get financial statements is using the Company.financials property:

from edgar import Company

# Get Apple's latest financials
company = Company("AAPL")
financials = company.get_financials()

# Access individual statements
balance_sheet = financials.balance_sheet
income_statement = financials.income_statement()
cash_flow = financials.cashflow_statement()

Alternative: From Specific Filing

For more control, extract statements from a specific filing:

from edgar import Company

# Get a specific filing
company = Company("AAPL")
filing = company.get_filings(form="10-K").latest()

# Parse XBRL data
xbrl = filing.xbrl()

# Access statements through the user-friendly API
statements = xbrl.statements

# Display financial statements
balance_sheet = statements.balance_sheet()
income_statement = statements.income_statement()
cash_flow = statements.cashflow_statement()

print(balance_sheet)  # Rich formatted output

Multi-Period Analysis

Method 1: Using MultiFinancials

Get financials across multiple years for trend analysis:

from edgar import Company, MultiFinancials

# Get multiple years of 10-K filings
company = Company("AAPL")
filings = company.get_filings(form="10-K").head(3)  # Last 3 annual reports

# Create multi-period financials
multi_financials = MultiFinancials(filings)

# Access statements spanning multiple years
balance_sheet = multi_financials.balance_sheet()
income_statement = multi_financials.income_statement()
cash_flow = multi_financials.cashflow_statement()

print("Multi-Year Income Statement:")
print(income_statement)

Method 2: Using XBRL Stitching

For more advanced multi-period analysis with intelligent period matching:

from edgar import Company
from edgar.xbrl import XBRLS

# Get multiple filings for trend analysis
company = Company("AAPL")
filings = company.get_filings(form="10-K").head(3)

# Create stitched view across multiple filings
xbrls = XBRLS.from_filings(filings)

# Access stitched statements
stitched_statements = xbrls.statements

# Display multi-period statements with intelligent period selection
income_trend = stitched_statements.income_statement()
balance_sheet_trend = stitched_statements.balance_sheet()
cashflow_trend = stitched_statements.cashflow_statement()

print("Three-Year Revenue Trend:")
revenue_trend = income_trend.to_dataframe()
revenue_row = revenue_trend.loc[revenue_trend['label'] == 'Revenue']
print(revenue_row)

Working with Individual Statements

Balance Sheet Analysis

# Get balance sheet
balance_sheet = statements.balance_sheet()

# Convert to DataFrame for analysis
bs_df = balance_sheet.to_dataframe()

# Extract key balance sheet items
total_assets = bs_df[bs_df['label'] == 'Total Assets']
total_liabilities = bs_df[bs_df['label'] == 'Total Liabilities']
shareholders_equity = bs_df[bs_df['label'] == "Total Stockholders' Equity"]

print("Balance Sheet Summary:")
print(f"Total Assets: ${total_assets.iloc[0, -1]/1e9:.1f}B")
print(f"Total Liabilities: ${total_liabilities.iloc[0, -1]/1e9:.1f}B")
print(f"Shareholders' Equity: ${shareholders_equity.iloc[0, -1]/1e9:.1f}B")

# Calculate debt-to-equity ratio
debt_to_equity = total_liabilities.iloc[0, -1] / shareholders_equity.iloc[0, -1]
print(f"Debt-to-Equity Ratio: {debt_to_equity:.2f}")

Income Statement Analysis

# Get income statement
income_statement = statements.income_statement()

# Convert to DataFrame
is_df = income_statement.to_dataframe()

# Extract key income statement items
revenue = is_df[is_df['label'] == 'Revenue']
gross_profit = is_df[is_df['label'] == 'Gross Profit']
operating_income = is_df[is_df['label'] == 'Operating Income']
net_income = is_df[is_df['label'] == 'Net Income']

print("Income Statement Analysis:")
print(f"Revenue: ${revenue.iloc[0, -1]/1e9:.1f}B")
print(f"Gross Profit: ${gross_profit.iloc[0, -1]/1e9:.1f}B")
print(f"Operating Income: ${operating_income.iloc[0, -1]/1e9:.1f}B")
print(f"Net Income: ${net_income.iloc[0, -1]/1e9:.1f}B")

# Calculate margins
gross_margin = (gross_profit.iloc[0, -1] / revenue.iloc[0, -1]) * 100
operating_margin = (operating_income.iloc[0, -1] / revenue.iloc[0, -1]) * 100
net_margin = (net_income.iloc[0, -1] / revenue.iloc[0, -1]) * 100

print(f"\nMargin Analysis:")
print(f"Gross Margin: {gross_margin:.1f}%")
print(f"Operating Margin: {operating_margin:.1f}%")
print(f"Net Margin: {net_margin:.1f}%")

Cash Flow Analysis

# Get cash flow statement
cash_flow = statements.cashflow_statement()

# Convert to DataFrame
cf_df = cash_flow.to_dataframe()

# Extract cash flow components
operating_cf = cf_df[cf_df['label'] == 'Net Cash from Operating Activities']
investing_cf = cf_df[cf_df['label'] == 'Net Cash from Investing Activities']
financing_cf = cf_df[cf_df['label'] == 'Net Cash from Financing Activities']

print("Cash Flow Analysis:")
print(f"Operating Cash Flow: ${operating_cf.iloc[0, -1]/1e9:.1f}B")
print(f"Investing Cash Flow: ${investing_cf.iloc[0, -1]/1e9:.1f}B")
print(f"Financing Cash Flow: ${financing_cf.iloc[0, -1]/1e9:.1f}B")

# Calculate free cash flow (Operating CF - Capital Expenditures)
capex = cf_df[cf_df['label'].str.contains('Capital Expenditures', case=False, na=False)]
if not capex.empty:
    free_cash_flow = operating_cf.iloc[0, -1] + capex.iloc[0, -1]  # CapEx is usually negative
    print(f"Free Cash Flow: ${free_cash_flow/1e9:.1f}B")

Advanced Statement Customization

Period Views and Formatting

# Get available period views for income statement
period_views = statements.get_period_views("IncomeStatement")
print("Available period views:")
for view in period_views:
    print(f"- {view['name']}: {view['description']}")

# Render with specific period view
annual_comparison = statements.income_statement(period_view="Annual Comparison")
quarterly_comparison = statements.income_statement(period_view="Quarterly Comparison")

# Show full date ranges for duration periods
income_with_dates = statements.income_statement(show_date_range=True)

print("Income Statement with Date Ranges:")
print(income_with_dates)

Standardized vs Company-Specific Labels

# Use standardized labels for cross-company comparison (default)
standardized = statements.income_statement(standard=True)

# Use company-specific labels as reported in filing
company_specific = statements.income_statement(standard=False)

print("Standardized Labels:")
print(standardized.to_dataframe()['label'].head(10))

print("\nCompany-Specific Labels:")
print(company_specific.to_dataframe()['label'].head(10))

Cross-Company Analysis

Compare Multiple Companies

import pandas as pd

def get_key_metrics(ticker):
    """Extract key financial metrics for a company."""
    try:
        company = Company(ticker)
        financials = company.financials

        # Get statements
        bs = financials.balance_sheet
        inc = financials.income
        cf = financials.cash_flow

        # Extract key metrics
        return {
            'ticker': ticker,
            'revenue': inc.loc['Revenue'].iloc[0] if 'Revenue' in inc.index else None,
            'net_income': inc.loc['Net Income'].iloc[0] if 'Net Income' in inc.index else None,
            'total_assets': bs.loc['Total Assets'].iloc[0] if 'Total Assets' in bs.index else None,
            'operating_cf': cf.loc['Net Cash from Operating Activities'].iloc[0] if 'Net Cash from Operating Activities' in cf.index else None
        }
    except Exception as e:
        print(f"Error processing {ticker}: {e}")
        return None

# Analyze multiple companies
tech_companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']
metrics = []

for ticker in tech_companies:
    result = get_key_metrics(ticker)
    if result:
        metrics.append(result)

# Create comparison DataFrame
comparison_df = pd.DataFrame(metrics)

# Convert to billions and calculate ratios
comparison_df['revenue_b'] = comparison_df['revenue'] / 1e9
comparison_df['net_income_b'] = comparison_df['net_income'] / 1e9
comparison_df['net_margin'] = (comparison_df['net_income'] / comparison_df['revenue']) * 100

print("Tech Giants Comparison:")
print(comparison_df[['ticker', 'revenue_b', 'net_income_b', 'net_margin']].round(1))

Advanced XBRL Features

Access Raw XBRL Facts

# Access the facts API for detailed XBRL data
facts = xbrl.facts

# Query facts by concept
revenue_facts = facts.query().by_concept('Revenue').to_dataframe()
print("Revenue facts across all periods:")
print(revenue_facts[['concept', 'label', 'period', 'value']])

# Search for specific concepts
earnings_facts = facts.search_facts("Earnings Per Share")
print("EPS-related facts:")
print(earnings_facts[['concept', 'label', 'value']])

# Get facts by statement type
balance_sheet_facts = facts.query().by_statement_type('BalanceSheet').to_dataframe()
print(f"Found {len(balance_sheet_facts)} balance sheet facts")

Time Series Analysis

# Get time series data for specific concepts
revenue_series = facts.time_series('Revenue')
net_income_series = facts.time_series('Net Income')

print("Revenue Time Series:")
print(revenue_series)

# Convert to DataFrame for analysis
import pandas as pd
ts_df = pd.DataFrame({
    'revenue': revenue_series,
    'net_income': net_income_series
})

# Calculate growth rates
ts_df['revenue_growth'] = ts_df['revenue'].pct_change() * 100
ts_df['income_growth'] = ts_df['net_income'].pct_change() * 100

print("Growth Analysis:")
print(ts_df[['revenue_growth', 'income_growth']].round(1))

Dimensional Analysis

# Query facts by dimensions (if available)
segment_facts = facts.query().by_dimension('Segment').to_dataframe()
if not segment_facts.empty:
    print("Segment-specific financial data:")
    print(segment_facts[['concept', 'label', 'dimension_value', 'value']].head())

# Get facts by geographic dimension
geographic_facts = facts.query().by_dimension('Geography').to_dataframe()
if not geographic_facts.empty:
    print("Geographic breakdown:")
    print(geographic_facts[['concept', 'dimension_value', 'value']].head())

Export and Integration

Export to Different Formats

# Export statements to various formats
income_statement = statements.income_statement()

# Export to pandas DataFrame
df = income_statement.to_dataframe()

# Export to markdown
markdown_text = income_statement.render().to_markdown()

# Save to CSV
df.to_csv('apple_income_statement.csv', index=False)

# Save markdown to file
with open('apple_income_statement.md', 'w') as f:
    f.write(markdown_text)

print("Statements exported to CSV and Markdown")

Integration with Analysis Libraries

import matplotlib.pyplot as plt
import seaborn as sns

# Get multi-period data
filings = company.get_filings(form="10-K").head(5)
multi_financials = MultiFinancials(filings)
income_df = multi_financials.income.to_dataframe()

# Extract revenue data for plotting
revenue_data = income_df[income_df['label'] == 'Revenue'].iloc[0, 1:].astype(float)
periods = revenue_data.index

# Create visualization
plt.figure(figsize=(10, 6))
plt.plot(periods, revenue_data / 1e9, marker='o', linewidth=2)
plt.title('Apple Revenue Trend (5 Years)')
plt.xlabel('Period')
plt.ylabel('Revenue (Billions USD)')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Calculate year-over-year growth
revenue_growth = revenue_data.pct_change() * 100
print("Year-over-Year Revenue Growth:")
for period, growth in revenue_growth.dropna().items():
    print(f"{period}: {growth:.1f}%")

Performance Optimization

Efficient Multi-Company Analysis

# Efficient batch processing
def batch_analyze_companies(tickers, max_workers=5):
    """Analyze multiple companies efficiently."""
    from concurrent.futures import ThreadPoolExecutor

    def analyze_single(ticker):
        try:
            company = Company(ticker)
            financials = company.financials
            return {
                'ticker': ticker,
                'revenue': financials.income.loc['Revenue'].iloc[0],
                'assets': financials.balance_sheet.loc['Total Assets'].iloc[0]
            }
        except Exception as e:
            return {'ticker': ticker, 'error': str(e)}

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(analyze_single, tickers))

    return [r for r in results if 'error' not in r]

# Analyze S&P 100 companies efficiently
sp100_sample = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM']
results = batch_analyze_companies(sp100_sample)

comparison_df = pd.DataFrame(results)
print("Batch Analysis Results:")
print(comparison_df.head())

Caching for Repeated Analysis

# Cache XBRL data for repeated use
company = Company("AAPL")
filing = company.get_filings(form="10-K").latest()

# Parse once, use multiple times
xbrl = filing.xbrl()

# Perform different analyses on same data
balance_sheet = xbrl.statements.balance_sheet()
income_statement = xbrl.statements.income_statement()
cash_flow = xbrl.statements.cashflow_statement()

# Access facts for custom queries
facts = xbrl.facts
revenue_facts = facts.query().by_concept('Revenue').to_dataframe()
margin_facts = facts.search_facts("margin")

Common Patterns and Best Practices

Robust Financial Metric Extraction

def safe_extract_metric(statement_df, label, column=-1, default=None):
    """Safely extract a metric from financial statement DataFrame."""
    try:
        rows = statement_df[statement_df['label'].str.contains(label, case=False, na=False)]
        if not rows.empty:
            return rows.iloc[0, column]
        return default
    except Exception:
        return default

# Use for robust metric extraction
income_df = statements.income_statement().to_dataframe()

revenue = safe_extract_metric(income_df, 'Revenue')
net_income = safe_extract_metric(income_df, 'Net Income')
operating_income = safe_extract_metric(income_df, 'Operating Income')

if revenue and net_income:
    net_margin = (net_income / revenue) * 100
    print(f"Net Margin: {net_margin:.1f}%")

Handle Missing or Inconsistent Data

def get_financial_metrics(company_ticker):
    """Get financial metrics with error handling."""
    try:
        company = Company(company_ticker)
        financials = company.financials

        metrics = {}

        # Try to get income statement metrics
        try:
            income = financials.income
            metrics['revenue'] = income.loc['Revenue'].iloc[0] if 'Revenue' in income.index else None
            metrics['net_income'] = income.loc['Net Income'].iloc[0] if 'Net Income' in income.index else None
        except Exception as e:
            print(f"Income statement error for {company_ticker}: {e}")

        # Try to get balance sheet metrics
        try:
            balance_sheet = financials.balance_sheet
            metrics['total_assets'] = balance_sheet.loc['Total Assets'].iloc[0] if 'Total Assets' in balance_sheet.index else None
        except Exception as e:
            print(f"Balance sheet error for {company_ticker}: {e}")

        return metrics

    except Exception as e:
        print(f"Company error for {company_ticker}: {e}")
        return {}

# Test with various companies
test_companies = ['AAPL', 'INVALID_TICKER', 'MSFT']
for ticker in test_companies:
    metrics = get_financial_metrics(ticker)
    if metrics:
        print(f"{ticker}: {metrics}")

Troubleshooting Common Issues

Statement Not Available

# Check what statements are available
try:
    statements = xbrl.statements
    available_statements = statements.available_statements()
    print(f"Available statements: {available_statements}")

    # Try alternative statement access
    if 'IncomeStatement' in available_statements:
        income = statements.income_statement()
    elif 'ComprehensiveIncome' in available_statements:
        income = statements['ComprehensiveIncome']
    else:
        print("No income statement available")

except Exception as e:
    print(f"Error accessing statements: {e}")

Period Selection Issues

# Check available periods
reporting_periods = xbrl.reporting_periods
print("Available reporting periods:")
for period in reporting_periods[:5]:  # Show first 5
    print(f"- {period['date']} ({period['type']}): {period.get('duration', 'N/A')} days")

# Handle quarterly vs annual periods
if any(p.get('duration', 0) < 120 for p in reporting_periods):
    print("Quarterly periods detected")
    quarterly_income = statements.income_statement(period_view="Quarterly Comparison")
else:
    print("Annual periods only")
    annual_income = statements.income_statement(period_view="Annual Comparison")

Next Steps

Now that you can extract financial statements, explore these advanced topics:

- Query XBRL Data - Advanced XBRL fact querying and analysis

Getting XBRL from Filings - Original XBRL documentation
Company Financials - Company financials API
XBRL API Reference - Complete XBRL class documentation