Skip to content

Extract Financial Statements

Learn how to extract and work with financial statements from SEC filings using EdgarTools' powerful XBRL processing capabilities.

Prerequisites

Quick Start: Single Period Statements

Get Latest Financial Statements

The fastest way to get financial statements is using the Company.financials property:

from edgar import Company

# Get Apple's latest financials
company = Company("AAPL")
financials = company.get_financials()

# Access individual statements
balance_sheet = financials.balance_sheet
income_statement = financials.income_statement()
cash_flow = financials.cashflow_statement()

Alternative: From Specific Filing

For more control, extract statements from a specific filing:

from edgar import Company

# Get a specific filing
company = Company("AAPL")
filing = company.get_filings(form="10-K").latest()

# Parse XBRL data
xbrl = filing.xbrl()

# Access statements through the user-friendly API
statements = xbrl.statements

# Display financial statements
balance_sheet = statements.balance_sheet()
income_statement = statements.income_statement()
cash_flow = statements.cashflow_statement()

print(balance_sheet)  # Rich formatted output

Multi-Period Analysis

Method 1: Using MultiFinancials

Get financials across multiple years for trend analysis:

from edgar import Company, MultiFinancials

# Get multiple years of 10-K filings
company = Company("AAPL")
filings = company.get_filings(form="10-K").head(3)  # Last 3 annual reports

# Create multi-period financials
multi_financials = MultiFinancials(filings)

# Access statements spanning multiple years
balance_sheet = multi_financials.balance_sheet()
income_statement = multi_financials.income_statement()
cash_flow = multi_financials.cashflow_statement()

print("Multi-Year Income Statement:")
print(income_statement)

Method 2: Using XBRL Stitching

For more advanced multi-period analysis with intelligent period matching:

from edgar import Company
from edgar.xbrl import XBRLS

# Get multiple filings for trend analysis
company = Company("AAPL")
filings = company.get_filings(form="10-K").head(3)

# Create stitched view across multiple filings
xbrls = XBRLS.from_filings(filings)

# Access stitched statements
stitched_statements = xbrls.statements

# Display multi-period statements with intelligent period selection
income_trend = stitched_statements.income_statement()
balance_sheet_trend = stitched_statements.balance_sheet()
cashflow_trend = stitched_statements.cashflow_statement()

print("Three-Year Revenue Trend:")
revenue_trend = income_trend.to_dataframe()
revenue_row = revenue_trend.loc[revenue_trend['label'] == 'Revenue']
print(revenue_row)

Working with Individual Statements

Balance Sheet Analysis

# Get balance sheet
balance_sheet = statements.balance_sheet()

# Convert to DataFrame for analysis
bs_df = balance_sheet.to_dataframe()

# Extract key balance sheet items
total_assets = bs_df[bs_df['label'] == 'Total Assets']
total_liabilities = bs_df[bs_df['label'] == 'Total Liabilities']
shareholders_equity = bs_df[bs_df['label'] == "Total Stockholders' Equity"]

print("Balance Sheet Summary:")
print(f"Total Assets: ${total_assets.iloc[0, -1]/1e9:.1f}B")
print(f"Total Liabilities: ${total_liabilities.iloc[0, -1]/1e9:.1f}B")
print(f"Shareholders' Equity: ${shareholders_equity.iloc[0, -1]/1e9:.1f}B")

# Calculate debt-to-equity ratio
debt_to_equity = total_liabilities.iloc[0, -1] / shareholders_equity.iloc[0, -1]
print(f"Debt-to-Equity Ratio: {debt_to_equity:.2f}")

Income Statement Analysis

# Get income statement
income_statement = statements.income_statement()

# Convert to DataFrame
is_df = income_statement.to_dataframe()

# Extract key income statement items
revenue = is_df[is_df['label'] == 'Revenue']
gross_profit = is_df[is_df['label'] == 'Gross Profit']
operating_income = is_df[is_df['label'] == 'Operating Income']
net_income = is_df[is_df['label'] == 'Net Income']

print("Income Statement Analysis:")
print(f"Revenue: ${revenue.iloc[0, -1]/1e9:.1f}B")
print(f"Gross Profit: ${gross_profit.iloc[0, -1]/1e9:.1f}B")
print(f"Operating Income: ${operating_income.iloc[0, -1]/1e9:.1f}B")
print(f"Net Income: ${net_income.iloc[0, -1]/1e9:.1f}B")

# Calculate margins
gross_margin = (gross_profit.iloc[0, -1] / revenue.iloc[0, -1]) * 100
operating_margin = (operating_income.iloc[0, -1] / revenue.iloc[0, -1]) * 100
net_margin = (net_income.iloc[0, -1] / revenue.iloc[0, -1]) * 100

print(f"\nMargin Analysis:")
print(f"Gross Margin: {gross_margin:.1f}%")
print(f"Operating Margin: {operating_margin:.1f}%")
print(f"Net Margin: {net_margin:.1f}%")

Cash Flow Analysis

# Get cash flow statement
cash_flow = statements.cashflow_statement()

# Convert to DataFrame
cf_df = cash_flow.to_dataframe()

# Extract cash flow components
operating_cf = cf_df[cf_df['label'] == 'Net Cash from Operating Activities']
investing_cf = cf_df[cf_df['label'] == 'Net Cash from Investing Activities']
financing_cf = cf_df[cf_df['label'] == 'Net Cash from Financing Activities']

print("Cash Flow Analysis:")
print(f"Operating Cash Flow: ${operating_cf.iloc[0, -1]/1e9:.1f}B")
print(f"Investing Cash Flow: ${investing_cf.iloc[0, -1]/1e9:.1f}B")
print(f"Financing Cash Flow: ${financing_cf.iloc[0, -1]/1e9:.1f}B")

# Calculate free cash flow (Operating CF - Capital Expenditures)
capex = cf_df[cf_df['label'].str.contains('Capital Expenditures', case=False, na=False)]
if not capex.empty:
    free_cash_flow = operating_cf.iloc[0, -1] + capex.iloc[0, -1]  # CapEx is usually negative
    print(f"Free Cash Flow: ${free_cash_flow/1e9:.1f}B")

Advanced Statement Customization

Period Views and Formatting

# Get available period views for income statement
period_views = statements.get_period_views("IncomeStatement")
print("Available period views:")
for view in period_views:
    print(f"- {view['name']}: {view['description']}")

# Render with specific period view
annual_comparison = statements.income_statement(period_view="Annual Comparison")
quarterly_comparison = statements.income_statement(period_view="Quarterly Comparison")

# Show full date ranges for duration periods
income_with_dates = statements.income_statement(show_date_range=True)

print("Income Statement with Date Ranges:")
print(income_with_dates)

Standardized vs Company-Specific Labels

# Use standardized labels for cross-company comparison (default)
standardized = statements.income_statement(standard=True)

# Use company-specific labels as reported in filing
company_specific = statements.income_statement(standard=False)

print("Standardized Labels:")
print(standardized.to_dataframe()['label'].head(10))

print("\nCompany-Specific Labels:")
print(company_specific.to_dataframe()['label'].head(10))

Cross-Company Analysis

Compare Multiple Companies

import pandas as pd

def get_key_metrics(ticker):
    """Extract key financial metrics for a company."""
    try:
        company = Company(ticker)
        financials = company.financials

        # Get statements
        bs = financials.balance_sheet
        inc = financials.income
        cf = financials.cash_flow

        # Extract key metrics
        return {
            'ticker': ticker,
            'revenue': inc.loc['Revenue'].iloc[0] if 'Revenue' in inc.index else None,
            'net_income': inc.loc['Net Income'].iloc[0] if 'Net Income' in inc.index else None,
            'total_assets': bs.loc['Total Assets'].iloc[0] if 'Total Assets' in bs.index else None,
            'operating_cf': cf.loc['Net Cash from Operating Activities'].iloc[0] if 'Net Cash from Operating Activities' in cf.index else None
        }
    except Exception as e:
        print(f"Error processing {ticker}: {e}")
        return None

# Analyze multiple companies
tech_companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']
metrics = []

for ticker in tech_companies:
    result = get_key_metrics(ticker)
    if result:
        metrics.append(result)

# Create comparison DataFrame
comparison_df = pd.DataFrame(metrics)

# Convert to billions and calculate ratios
comparison_df['revenue_b'] = comparison_df['revenue'] / 1e9
comparison_df['net_income_b'] = comparison_df['net_income'] / 1e9
comparison_df['net_margin'] = (comparison_df['net_income'] / comparison_df['revenue']) * 100

print("Tech Giants Comparison:")
print(comparison_df[['ticker', 'revenue_b', 'net_income_b', 'net_margin']].round(1))

Advanced XBRL Features

Access Raw XBRL Facts

# Access the facts API for detailed XBRL data
facts = xbrl.facts

# Query facts by concept
revenue_facts = facts.query().by_concept('Revenue').to_dataframe()
print("Revenue facts across all periods:")
print(revenue_facts[['concept', 'label', 'period', 'value']])

# Search for specific concepts
earnings_facts = facts.search_facts("Earnings Per Share")
print("EPS-related facts:")
print(earnings_facts[['concept', 'label', 'value']])

# Get facts by statement type
balance_sheet_facts = facts.query().by_statement_type('BalanceSheet').to_dataframe()
print(f"Found {len(balance_sheet_facts)} balance sheet facts")

Time Series Analysis

# Get time series data for specific concepts
revenue_series = facts.time_series('Revenue')
net_income_series = facts.time_series('Net Income')

print("Revenue Time Series:")
print(revenue_series)

# Convert to DataFrame for analysis
import pandas as pd
ts_df = pd.DataFrame({
    'revenue': revenue_series,
    'net_income': net_income_series
})

# Calculate growth rates
ts_df['revenue_growth'] = ts_df['revenue'].pct_change() * 100
ts_df['income_growth'] = ts_df['net_income'].pct_change() * 100

print("Growth Analysis:")
print(ts_df[['revenue_growth', 'income_growth']].round(1))

Dimensional Analysis

# Query facts by dimensions (if available)
segment_facts = facts.query().by_dimension('Segment').to_dataframe()
if not segment_facts.empty:
    print("Segment-specific financial data:")
    print(segment_facts[['concept', 'label', 'dimension_value', 'value']].head())

# Get facts by geographic dimension
geographic_facts = facts.query().by_dimension('Geography').to_dataframe()
if not geographic_facts.empty:
    print("Geographic breakdown:")
    print(geographic_facts[['concept', 'dimension_value', 'value']].head())

Export and Integration

Export to Different Formats

# Export statements to various formats
income_statement = statements.income_statement()

# Export to pandas DataFrame
df = income_statement.to_dataframe()

# Export to markdown
markdown_text = income_statement.render().to_markdown()

# Save to CSV
df.to_csv('apple_income_statement.csv', index=False)

# Save markdown to file
with open('apple_income_statement.md', 'w') as f:
    f.write(markdown_text)

print("Statements exported to CSV and Markdown")

Integration with Analysis Libraries

import matplotlib.pyplot as plt
import seaborn as sns

# Get multi-period data
filings = company.get_filings(form="10-K").head(5)
multi_financials = MultiFinancials(filings)
income_df = multi_financials.income.to_dataframe()

# Extract revenue data for plotting
revenue_data = income_df[income_df['label'] == 'Revenue'].iloc[0, 1:].astype(float)
periods = revenue_data.index

# Create visualization
plt.figure(figsize=(10, 6))
plt.plot(periods, revenue_data / 1e9, marker='o', linewidth=2)
plt.title('Apple Revenue Trend (5 Years)')
plt.xlabel('Period')
plt.ylabel('Revenue (Billions USD)')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Calculate year-over-year growth
revenue_growth = revenue_data.pct_change() * 100
print("Year-over-Year Revenue Growth:")
for period, growth in revenue_growth.dropna().items():
    print(f"{period}: {growth:.1f}%")

Performance Optimization

Efficient Multi-Company Analysis

# Efficient batch processing
def batch_analyze_companies(tickers, max_workers=5):
    """Analyze multiple companies efficiently."""
    from concurrent.futures import ThreadPoolExecutor

    def analyze_single(ticker):
        try:
            company = Company(ticker)
            financials = company.financials
            return {
                'ticker': ticker,
                'revenue': financials.income.loc['Revenue'].iloc[0],
                'assets': financials.balance_sheet.loc['Total Assets'].iloc[0]
            }
        except Exception as e:
            return {'ticker': ticker, 'error': str(e)}

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(analyze_single, tickers))

    return [r for r in results if 'error' not in r]

# Analyze S&P 100 companies efficiently
sp100_sample = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM']
results = batch_analyze_companies(sp100_sample)

comparison_df = pd.DataFrame(results)
print("Batch Analysis Results:")
print(comparison_df.head())

Caching for Repeated Analysis

# Cache XBRL data for repeated use
company = Company("AAPL")
filing = company.get_filings(form="10-K").latest()

# Parse once, use multiple times
xbrl = filing.xbrl()

# Perform different analyses on same data
balance_sheet = xbrl.statements.balance_sheet()
income_statement = xbrl.statements.income_statement()
cash_flow = xbrl.statements.cashflow_statement()

# Access facts for custom queries
facts = xbrl.facts
revenue_facts = facts.query().by_concept('Revenue').to_dataframe()
margin_facts = facts.search_facts("margin")

Common Patterns and Best Practices

Robust Financial Metric Extraction

def safe_extract_metric(statement_df, label, column=-1, default=None):
    """Safely extract a metric from financial statement DataFrame."""
    try:
        rows = statement_df[statement_df['label'].str.contains(label, case=False, na=False)]
        if not rows.empty:
            return rows.iloc[0, column]
        return default
    except Exception:
        return default

# Use for robust metric extraction
income_df = statements.income_statement().to_dataframe()

revenue = safe_extract_metric(income_df, 'Revenue')
net_income = safe_extract_metric(income_df, 'Net Income')
operating_income = safe_extract_metric(income_df, 'Operating Income')

if revenue and net_income:
    net_margin = (net_income / revenue) * 100
    print(f"Net Margin: {net_margin:.1f}%")

Handle Missing or Inconsistent Data

def get_financial_metrics(company_ticker):
    """Get financial metrics with error handling."""
    try:
        company = Company(company_ticker)
        financials = company.financials

        metrics = {}

        # Try to get income statement metrics
        try:
            income = financials.income
            metrics['revenue'] = income.loc['Revenue'].iloc[0] if 'Revenue' in income.index else None
            metrics['net_income'] = income.loc['Net Income'].iloc[0] if 'Net Income' in income.index else None
        except Exception as e:
            print(f"Income statement error for {company_ticker}: {e}")

        # Try to get balance sheet metrics
        try:
            balance_sheet = financials.balance_sheet
            metrics['total_assets'] = balance_sheet.loc['Total Assets'].iloc[0] if 'Total Assets' in balance_sheet.index else None
        except Exception as e:
            print(f"Balance sheet error for {company_ticker}: {e}")

        return metrics

    except Exception as e:
        print(f"Company error for {company_ticker}: {e}")
        return {}

# Test with various companies
test_companies = ['AAPL', 'INVALID_TICKER', 'MSFT']
for ticker in test_companies:
    metrics = get_financial_metrics(ticker)
    if metrics:
        print(f"{ticker}: {metrics}")

Troubleshooting Common Issues

Statement Not Available

# Check what statements are available
try:
    statements = xbrl.statements
    available_statements = statements.available_statements()
    print(f"Available statements: {available_statements}")

    # Try alternative statement access
    if 'IncomeStatement' in available_statements:
        income = statements.income_statement()
    elif 'ComprehensiveIncome' in available_statements:
        income = statements['ComprehensiveIncome']
    else:
        print("No income statement available")

except Exception as e:
    print(f"Error accessing statements: {e}")

Period Selection Issues

# Check available periods
reporting_periods = xbrl.reporting_periods
print("Available reporting periods:")
for period in reporting_periods[:5]:  # Show first 5
    print(f"- {period['date']} ({period['type']}): {period.get('duration', 'N/A')} days")

# Handle quarterly vs annual periods
if any(p.get('duration', 0) < 120 for p in reporting_periods):
    print("Quarterly periods detected")
    quarterly_income = statements.income_statement(period_view="Quarterly Comparison")
else:
    print("Annual periods only")
    annual_income = statements.income_statement(period_view="Annual Comparison")

Next Steps

Now that you can extract financial statements, explore these advanced topics:

- Query XBRL Data - Advanced XBRL fact querying and analysis