Extract Financial Statements
Learn how to extract and work with financial statements from SEC filings using EdgarTools' powerful XBRL processing capabilities.
Prerequisites
- Basic understanding of financial statements (balance sheet, income statement, cash flow)
- Familiarity with finding companies and searching filings
Quick Start: Single Period Statements
Get Latest Financial Statements
The fastest way to get financial statements is using the Company.financials property:
from edgar import Company
# Get Apple's latest financials
company = Company("AAPL")
financials = company.get_financials()
# Access individual statements
balance_sheet = financials.balance_sheet
income_statement = financials.income_statement()
cash_flow = financials.cashflow_statement()
Alternative: From Specific Filing
For more control, extract statements from a specific filing:
from edgar import Company
# Get a specific filing
company = Company("AAPL")
filing = company.get_filings(form="10-K").latest()
# Parse XBRL data
xbrl = filing.xbrl()
# Access statements through the user-friendly API
statements = xbrl.statements
# Display financial statements
balance_sheet = statements.balance_sheet()
income_statement = statements.income_statement()
cash_flow = statements.cashflow_statement()
print(balance_sheet) # Rich formatted output
Multi-Period Analysis
Method 1: Using MultiFinancials
Get financials across multiple years for trend analysis:
from edgar import Company, MultiFinancials
# Get multiple years of 10-K filings
company = Company("AAPL")
filings = company.get_filings(form="10-K").head(3) # Last 3 annual reports
# Create multi-period financials
multi_financials = MultiFinancials(filings)
# Access statements spanning multiple years
balance_sheet = multi_financials.balance_sheet()
income_statement = multi_financials.income_statement()
cash_flow = multi_financials.cashflow_statement()
print("Multi-Year Income Statement:")
print(income_statement)
Method 2: Using XBRL Stitching
For more advanced multi-period analysis with intelligent period matching:
from edgar import Company
from edgar.xbrl import XBRLS
# Get multiple filings for trend analysis
company = Company("AAPL")
filings = company.get_filings(form="10-K").head(3)
# Create stitched view across multiple filings
xbrls = XBRLS.from_filings(filings)
# Access stitched statements
stitched_statements = xbrls.statements
# Display multi-period statements with intelligent period selection
income_trend = stitched_statements.income_statement()
balance_sheet_trend = stitched_statements.balance_sheet()
cashflow_trend = stitched_statements.cashflow_statement()
print("Three-Year Revenue Trend:")
revenue_trend = income_trend.to_dataframe()
revenue_row = revenue_trend.loc[revenue_trend['label'] == 'Revenue']
print(revenue_row)
Working with Individual Statements
Balance Sheet Analysis
# Get balance sheet
balance_sheet = statements.balance_sheet()
# Convert to DataFrame for analysis
bs_df = balance_sheet.to_dataframe()
# Extract key balance sheet items
total_assets = bs_df[bs_df['label'] == 'Total Assets']
total_liabilities = bs_df[bs_df['label'] == 'Total Liabilities']
shareholders_equity = bs_df[bs_df['label'] == "Total Stockholders' Equity"]
print("Balance Sheet Summary:")
print(f"Total Assets: ${total_assets.iloc[0, -1]/1e9:.1f}B")
print(f"Total Liabilities: ${total_liabilities.iloc[0, -1]/1e9:.1f}B")
print(f"Shareholders' Equity: ${shareholders_equity.iloc[0, -1]/1e9:.1f}B")
# Calculate debt-to-equity ratio
debt_to_equity = total_liabilities.iloc[0, -1] / shareholders_equity.iloc[0, -1]
print(f"Debt-to-Equity Ratio: {debt_to_equity:.2f}")
Income Statement Analysis
# Get income statement
income_statement = statements.income_statement()
# Convert to DataFrame
is_df = income_statement.to_dataframe()
# Extract key income statement items
revenue = is_df[is_df['label'] == 'Revenue']
gross_profit = is_df[is_df['label'] == 'Gross Profit']
operating_income = is_df[is_df['label'] == 'Operating Income']
net_income = is_df[is_df['label'] == 'Net Income']
print("Income Statement Analysis:")
print(f"Revenue: ${revenue.iloc[0, -1]/1e9:.1f}B")
print(f"Gross Profit: ${gross_profit.iloc[0, -1]/1e9:.1f}B")
print(f"Operating Income: ${operating_income.iloc[0, -1]/1e9:.1f}B")
print(f"Net Income: ${net_income.iloc[0, -1]/1e9:.1f}B")
# Calculate margins
gross_margin = (gross_profit.iloc[0, -1] / revenue.iloc[0, -1]) * 100
operating_margin = (operating_income.iloc[0, -1] / revenue.iloc[0, -1]) * 100
net_margin = (net_income.iloc[0, -1] / revenue.iloc[0, -1]) * 100
print(f"\nMargin Analysis:")
print(f"Gross Margin: {gross_margin:.1f}%")
print(f"Operating Margin: {operating_margin:.1f}%")
print(f"Net Margin: {net_margin:.1f}%")
Cash Flow Analysis
# Get cash flow statement
cash_flow = statements.cashflow_statement()
# Convert to DataFrame
cf_df = cash_flow.to_dataframe()
# Extract cash flow components
operating_cf = cf_df[cf_df['label'] == 'Net Cash from Operating Activities']
investing_cf = cf_df[cf_df['label'] == 'Net Cash from Investing Activities']
financing_cf = cf_df[cf_df['label'] == 'Net Cash from Financing Activities']
print("Cash Flow Analysis:")
print(f"Operating Cash Flow: ${operating_cf.iloc[0, -1]/1e9:.1f}B")
print(f"Investing Cash Flow: ${investing_cf.iloc[0, -1]/1e9:.1f}B")
print(f"Financing Cash Flow: ${financing_cf.iloc[0, -1]/1e9:.1f}B")
# Calculate free cash flow (Operating CF - Capital Expenditures)
capex = cf_df[cf_df['label'].str.contains('Capital Expenditures', case=False, na=False)]
if not capex.empty:
free_cash_flow = operating_cf.iloc[0, -1] + capex.iloc[0, -1] # CapEx is usually negative
print(f"Free Cash Flow: ${free_cash_flow/1e9:.1f}B")
Advanced Statement Customization
Period Views and Formatting
# Get available period views for income statement
period_views = statements.get_period_views("IncomeStatement")
print("Available period views:")
for view in period_views:
print(f"- {view['name']}: {view['description']}")
# Render with specific period view
annual_comparison = statements.income_statement(period_view="Annual Comparison")
quarterly_comparison = statements.income_statement(period_view="Quarterly Comparison")
# Show full date ranges for duration periods
income_with_dates = statements.income_statement(show_date_range=True)
print("Income Statement with Date Ranges:")
print(income_with_dates)
Standardized vs Company-Specific Labels
# Use standardized labels for cross-company comparison (default)
standardized = statements.income_statement(standard=True)
# Use company-specific labels as reported in filing
company_specific = statements.income_statement(standard=False)
print("Standardized Labels:")
print(standardized.to_dataframe()['label'].head(10))
print("\nCompany-Specific Labels:")
print(company_specific.to_dataframe()['label'].head(10))
Cross-Company Analysis
Compare Multiple Companies
import pandas as pd
def get_key_metrics(ticker):
"""Extract key financial metrics for a company."""
try:
company = Company(ticker)
financials = company.financials
# Get statements
bs = financials.balance_sheet
inc = financials.income
cf = financials.cash_flow
# Extract key metrics
return {
'ticker': ticker,
'revenue': inc.loc['Revenue'].iloc[0] if 'Revenue' in inc.index else None,
'net_income': inc.loc['Net Income'].iloc[0] if 'Net Income' in inc.index else None,
'total_assets': bs.loc['Total Assets'].iloc[0] if 'Total Assets' in bs.index else None,
'operating_cf': cf.loc['Net Cash from Operating Activities'].iloc[0] if 'Net Cash from Operating Activities' in cf.index else None
}
except Exception as e:
print(f"Error processing {ticker}: {e}")
return None
# Analyze multiple companies
tech_companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']
metrics = []
for ticker in tech_companies:
result = get_key_metrics(ticker)
if result:
metrics.append(result)
# Create comparison DataFrame
comparison_df = pd.DataFrame(metrics)
# Convert to billions and calculate ratios
comparison_df['revenue_b'] = comparison_df['revenue'] / 1e9
comparison_df['net_income_b'] = comparison_df['net_income'] / 1e9
comparison_df['net_margin'] = (comparison_df['net_income'] / comparison_df['revenue']) * 100
print("Tech Giants Comparison:")
print(comparison_df[['ticker', 'revenue_b', 'net_income_b', 'net_margin']].round(1))
Advanced XBRL Features
Access Raw XBRL Facts
# Access the facts API for detailed XBRL data
facts = xbrl.facts
# Query facts by concept
revenue_facts = facts.query().by_concept('Revenue').to_dataframe()
print("Revenue facts across all periods:")
print(revenue_facts[['concept', 'label', 'period', 'value']])
# Search for specific concepts
earnings_facts = facts.search_facts("Earnings Per Share")
print("EPS-related facts:")
print(earnings_facts[['concept', 'label', 'value']])
# Get facts by statement type
balance_sheet_facts = facts.query().by_statement_type('BalanceSheet').to_dataframe()
print(f"Found {len(balance_sheet_facts)} balance sheet facts")
Time Series Analysis
# Get time series data for specific concepts
revenue_series = facts.time_series('Revenue')
net_income_series = facts.time_series('Net Income')
print("Revenue Time Series:")
print(revenue_series)
# Convert to DataFrame for analysis
import pandas as pd
ts_df = pd.DataFrame({
'revenue': revenue_series,
'net_income': net_income_series
})
# Calculate growth rates
ts_df['revenue_growth'] = ts_df['revenue'].pct_change() * 100
ts_df['income_growth'] = ts_df['net_income'].pct_change() * 100
print("Growth Analysis:")
print(ts_df[['revenue_growth', 'income_growth']].round(1))
Dimensional Analysis
# Query facts by dimensions (if available)
segment_facts = facts.query().by_dimension('Segment').to_dataframe()
if not segment_facts.empty:
print("Segment-specific financial data:")
print(segment_facts[['concept', 'label', 'dimension_value', 'value']].head())
# Get facts by geographic dimension
geographic_facts = facts.query().by_dimension('Geography').to_dataframe()
if not geographic_facts.empty:
print("Geographic breakdown:")
print(geographic_facts[['concept', 'dimension_value', 'value']].head())
Export and Integration
Export to Different Formats
# Export statements to various formats
income_statement = statements.income_statement()
# Export to pandas DataFrame
df = income_statement.to_dataframe()
# Export to markdown
markdown_text = income_statement.render().to_markdown()
# Save to CSV
df.to_csv('apple_income_statement.csv', index=False)
# Save markdown to file
with open('apple_income_statement.md', 'w') as f:
f.write(markdown_text)
print("Statements exported to CSV and Markdown")
Integration with Analysis Libraries
import matplotlib.pyplot as plt
import seaborn as sns
# Get multi-period data
filings = company.get_filings(form="10-K").head(5)
multi_financials = MultiFinancials(filings)
income_df = multi_financials.income.to_dataframe()
# Extract revenue data for plotting
revenue_data = income_df[income_df['label'] == 'Revenue'].iloc[0, 1:].astype(float)
periods = revenue_data.index
# Create visualization
plt.figure(figsize=(10, 6))
plt.plot(periods, revenue_data / 1e9, marker='o', linewidth=2)
plt.title('Apple Revenue Trend (5 Years)')
plt.xlabel('Period')
plt.ylabel('Revenue (Billions USD)')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Calculate year-over-year growth
revenue_growth = revenue_data.pct_change() * 100
print("Year-over-Year Revenue Growth:")
for period, growth in revenue_growth.dropna().items():
print(f"{period}: {growth:.1f}%")
Performance Optimization
Efficient Multi-Company Analysis
# Efficient batch processing
def batch_analyze_companies(tickers, max_workers=5):
"""Analyze multiple companies efficiently."""
from concurrent.futures import ThreadPoolExecutor
def analyze_single(ticker):
try:
company = Company(ticker)
financials = company.financials
return {
'ticker': ticker,
'revenue': financials.income.loc['Revenue'].iloc[0],
'assets': financials.balance_sheet.loc['Total Assets'].iloc[0]
}
except Exception as e:
return {'ticker': ticker, 'error': str(e)}
with ThreadPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(analyze_single, tickers))
return [r for r in results if 'error' not in r]
# Analyze S&P 100 companies efficiently
sp100_sample = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM']
results = batch_analyze_companies(sp100_sample)
comparison_df = pd.DataFrame(results)
print("Batch Analysis Results:")
print(comparison_df.head())
Caching for Repeated Analysis
# Cache XBRL data for repeated use
company = Company("AAPL")
filing = company.get_filings(form="10-K").latest()
# Parse once, use multiple times
xbrl = filing.xbrl()
# Perform different analyses on same data
balance_sheet = xbrl.statements.balance_sheet()
income_statement = xbrl.statements.income_statement()
cash_flow = xbrl.statements.cashflow_statement()
# Access facts for custom queries
facts = xbrl.facts
revenue_facts = facts.query().by_concept('Revenue').to_dataframe()
margin_facts = facts.search_facts("margin")
Common Patterns and Best Practices
Robust Financial Metric Extraction
def safe_extract_metric(statement_df, label, column=-1, default=None):
"""Safely extract a metric from financial statement DataFrame."""
try:
rows = statement_df[statement_df['label'].str.contains(label, case=False, na=False)]
if not rows.empty:
return rows.iloc[0, column]
return default
except Exception:
return default
# Use for robust metric extraction
income_df = statements.income_statement().to_dataframe()
revenue = safe_extract_metric(income_df, 'Revenue')
net_income = safe_extract_metric(income_df, 'Net Income')
operating_income = safe_extract_metric(income_df, 'Operating Income')
if revenue and net_income:
net_margin = (net_income / revenue) * 100
print(f"Net Margin: {net_margin:.1f}%")
Handle Missing or Inconsistent Data
def get_financial_metrics(company_ticker):
"""Get financial metrics with error handling."""
try:
company = Company(company_ticker)
financials = company.financials
metrics = {}
# Try to get income statement metrics
try:
income = financials.income
metrics['revenue'] = income.loc['Revenue'].iloc[0] if 'Revenue' in income.index else None
metrics['net_income'] = income.loc['Net Income'].iloc[0] if 'Net Income' in income.index else None
except Exception as e:
print(f"Income statement error for {company_ticker}: {e}")
# Try to get balance sheet metrics
try:
balance_sheet = financials.balance_sheet
metrics['total_assets'] = balance_sheet.loc['Total Assets'].iloc[0] if 'Total Assets' in balance_sheet.index else None
except Exception as e:
print(f"Balance sheet error for {company_ticker}: {e}")
return metrics
except Exception as e:
print(f"Company error for {company_ticker}: {e}")
return {}
# Test with various companies
test_companies = ['AAPL', 'INVALID_TICKER', 'MSFT']
for ticker in test_companies:
metrics = get_financial_metrics(ticker)
if metrics:
print(f"{ticker}: {metrics}")
Troubleshooting Common Issues
Statement Not Available
# Check what statements are available
try:
statements = xbrl.statements
available_statements = statements.available_statements()
print(f"Available statements: {available_statements}")
# Try alternative statement access
if 'IncomeStatement' in available_statements:
income = statements.income_statement()
elif 'ComprehensiveIncome' in available_statements:
income = statements['ComprehensiveIncome']
else:
print("No income statement available")
except Exception as e:
print(f"Error accessing statements: {e}")
Period Selection Issues
# Check available periods
reporting_periods = xbrl.reporting_periods
print("Available reporting periods:")
for period in reporting_periods[:5]: # Show first 5
print(f"- {period['date']} ({period['type']}): {period.get('duration', 'N/A')} days")
# Handle quarterly vs annual periods
if any(p.get('duration', 0) < 120 for p in reporting_periods):
print("Quarterly periods detected")
quarterly_income = statements.income_statement(period_view="Quarterly Comparison")
else:
print("Annual periods only")
annual_income = statements.income_statement(period_view="Annual Comparison")
Next Steps
Now that you can extract financial statements, explore these advanced topics:
- Query XBRL Data - Advanced XBRL fact querying and analysis
Related Documentation
- Getting XBRL from Filings - Original XBRL documentation
- Company Financials - Company financials API
- XBRL API Reference - Complete XBRL class documentation