Skip to main content

Real Estate Data Parsing Guide

Best practices for working with PropAPIS real estate data.

Data Structure

Property Object

Standard property data structure:

{
'address': '123 Main St, Austin, TX 78701',
'price': 450000,
'bedrooms': 3,
'bathrooms': 2.5,
'sqft': 2100,
'lot_size': 0.25, # acres
'year_built': 1995,
'property_type': 'Single Family',
'zestimate': 465000,
'rent_zestimate': 2500,
'status': 'active',
'days_on_market': 15,
'photos': ['url1', 'url2', ...],
'coordinates': {'lat': 30.267, 'lng': -97.743}
}

Error Handling

Handling Missing Data

Always check for None values:

prop = api.platforms.zillow.get_property(address)

# Safe access with defaults
bedrooms = prop.bedrooms if prop.bedrooms else 0
price = prop.price if prop.price else 0

# Or use getattr with default
sqft = getattr(prop, 'sqft', 0)

Try-Except Patterns

Handle API errors gracefully:

from propapis import PropAPIS, APIError

api = PropAPIS(api_key='your_api_key')

try:
prop = api.platforms.zillow.get_property(address)
except APIError as e:
if e.status_code == 404:
print(f"Property not found: {address}")
elif e.status_code == 429:
print("Rate limit exceeded, retrying...")
time.sleep(60)
else:
print(f"API error: {e}")

Retry Logic

Implement exponential backoff:

import time

def get_property_with_retry(address, max_retries=3):
for attempt in range(max_retries):
try:
return api.platforms.zillow.get_property(address)
except APIError as e:
if attempt < max_retries - 1:
wait_time = 2 ** attempt # 1s, 2s, 4s
time.sleep(wait_time)
else:
raise

Data Validation

Validate Property Data

Check data quality before using:

def validate_property(prop):
errors = []

if not prop.price or prop.price <= 0:
errors.append("Invalid price")

if not prop.bedrooms or prop.bedrooms < 0:
errors.append("Invalid bedrooms")

if prop.sqft and prop.sqft < 100:
errors.append("Suspiciously small sqft")

if errors:
print(f"Validation errors for {prop.address}:")
for error in errors:
print(f" - {error}")
return False

return True

# Use validation
prop = api.platforms.zillow.get_property(address)
if validate_property(prop):
# Process property
pass

Data Cleaning

Clean and normalize data:

def clean_property_data(prop):
cleaned = {
'address': prop.address.strip(),
'price': int(prop.price) if prop.price else None,
'bedrooms': int(prop.bedrooms) if prop.bedrooms else None,
'bathrooms': float(prop.bathrooms) if prop.bathrooms else None,
'sqft': int(prop.sqft) if prop.sqft else None,
}

# Calculate derived fields
if cleaned['price'] and cleaned['sqft']:
cleaned['price_per_sqft'] = cleaned['price'] / cleaned['sqft']

return cleaned

Batch Processing

Process Multiple Properties

Efficiently process large datasets:

def process_properties_batch(addresses):
results = []

for i, address in enumerate(addresses):
try:
prop = api.platforms.zillow.get_property(address)

if validate_property(prop):
cleaned = clean_property_data(prop)
results.append(cleaned)

# Progress indicator
if (i + 1) % 10 == 0:
print(f"Processed {i + 1}/{len(addresses)}")

except Exception as e:
print(f"Error processing {address}: {e}")
continue

return results

# Process list
addresses = ['123 Main St, Austin, TX', ...]
results = process_properties_batch(addresses)

Parallel Processing

Speed up with concurrent requests:

from concurrent.futures import ThreadPoolExecutor

def fetch_property(address):
try:
return api.platforms.zillow.get_property(address)
except Exception as e:
print(f"Error: {address} - {e}")
return None

# Process in parallel
addresses = ['addr1', 'addr2', 'addr3', ...]

with ThreadPoolExecutor(max_workers=5) as executor:
properties = list(executor.map(fetch_property, addresses))

# Filter out None values
properties = [p for p in properties if p is not None]

Data Export

Export to CSV

import csv

def export_to_csv(properties, filename):
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=[
'address', 'price', 'bedrooms', 'bathrooms',
'sqft', 'zestimate', 'rent_zestimate'
])

writer.writeheader()
for prop in properties:
writer.writerow({
'address': prop.address,
'price': prop.price,
'bedrooms': prop.bedrooms,
'bathrooms': prop.bathrooms,
'sqft': prop.sqft,
'zestimate': prop.zestimate,
'rent_zestimate': prop.rent_zestimate
})

# Export
export_to_csv(properties, 'properties.csv')

Export to JSON

import json

def export_to_json(properties, filename):
data = [clean_property_data(p) for p in properties]

with open(filename, 'w') as f:
json.dump(data, f, indent=2)

# Export
export_to_json(properties, 'properties.json')

Performance Optimization

Caching

Cache frequently accessed data:

from functools import lru_cache
import hashlib

@lru_cache(maxsize=1000)
def get_property_cached(address):
return api.platforms.zillow.get_property(address)

# Use cached version
prop = get_property_cached('123 Main St, Austin, TX')

Rate Limit Management

Respect rate limits:

import time

def rate_limited_requests(addresses, requests_per_minute=100):
delay = 60.0 / requests_per_minute

for address in addresses:
prop = api.platforms.zillow.get_property(address)
yield prop
time.sleep(delay)

# Use generator
for prop in rate_limited_requests(addresses):
process(prop)

Quick Start

from propapis import PropAPIS

api = PropAPIS(api_key='your_api_key')

# Basic usage with error handling
try:
prop = api.platforms.zillow.get_property('123 Main St, Austin, TX')

# Validate data
if prop.price and prop.bedrooms:
print(f"{prop.address} - ${prop.price:,}")
else:
print("Incomplete data")

except Exception as e:
print(f"Error: {e}")

For complete API documentation, see our API Reference.