Real Estate Data Parsing Guide
Best practices for working with PropAPIS real estate data.
Data Structure
Property Object
Standard property data structure:
{
'address': '123 Main St, Austin, TX 78701',
'price': 450000,
'bedrooms': 3,
'bathrooms': 2.5,
'sqft': 2100,
'lot_size': 0.25, # acres
'year_built': 1995,
'property_type': 'Single Family',
'zestimate': 465000,
'rent_zestimate': 2500,
'status': 'active',
'days_on_market': 15,
'photos': ['url1', 'url2', ...],
'coordinates': {'lat': 30.267, 'lng': -97.743}
}
Error Handling
Handling Missing Data
Always check for None values:
prop = api.platforms.zillow.get_property(address)
# Safe access with defaults
bedrooms = prop.bedrooms if prop.bedrooms else 0
price = prop.price if prop.price else 0
# Or use getattr with default
sqft = getattr(prop, 'sqft', 0)
Try-Except Patterns
Handle API errors gracefully:
from propapis import PropAPIS, APIError
api = PropAPIS(api_key='your_api_key')
try:
prop = api.platforms.zillow.get_property(address)
except APIError as e:
if e.status_code == 404:
print(f"Property not found: {address}")
elif e.status_code == 429:
print("Rate limit exceeded, retrying...")
time.sleep(60)
else:
print(f"API error: {e}")
Retry Logic
Implement exponential backoff:
import time
def get_property_with_retry(address, max_retries=3):
for attempt in range(max_retries):
try:
return api.platforms.zillow.get_property(address)
except APIError as e:
if attempt < max_retries - 1:
wait_time = 2 ** attempt # 1s, 2s, 4s
time.sleep(wait_time)
else:
raise
Data Validation
Validate Property Data
Check data quality before using:
def validate_property(prop):
errors = []
if not prop.price or prop.price <= 0:
errors.append("Invalid price")
if not prop.bedrooms or prop.bedrooms < 0:
errors.append("Invalid bedrooms")
if prop.sqft and prop.sqft < 100:
errors.append("Suspiciously small sqft")
if errors:
print(f"Validation errors for {prop.address}:")
for error in errors:
print(f" - {error}")
return False
return True
# Use validation
prop = api.platforms.zillow.get_property(address)
if validate_property(prop):
# Process property
pass
Data Cleaning
Clean and normalize data:
def clean_property_data(prop):
cleaned = {
'address': prop.address.strip(),
'price': int(prop.price) if prop.price else None,
'bedrooms': int(prop.bedrooms) if prop.bedrooms else None,
'bathrooms': float(prop.bathrooms) if prop.bathrooms else None,
'sqft': int(prop.sqft) if prop.sqft else None,
}
# Calculate derived fields
if cleaned['price'] and cleaned['sqft']:
cleaned['price_per_sqft'] = cleaned['price'] / cleaned['sqft']
return cleaned
Batch Processing
Process Multiple Properties
Efficiently process large datasets:
def process_properties_batch(addresses):
results = []
for i, address in enumerate(addresses):
try:
prop = api.platforms.zillow.get_property(address)
if validate_property(prop):
cleaned = clean_property_data(prop)
results.append(cleaned)
# Progress indicator
if (i + 1) % 10 == 0:
print(f"Processed {i + 1}/{len(addresses)}")
except Exception as e:
print(f"Error processing {address}: {e}")
continue
return results
# Process list
addresses = ['123 Main St, Austin, TX', ...]
results = process_properties_batch(addresses)
Parallel Processing
Speed up with concurrent requests:
from concurrent.futures import ThreadPoolExecutor
def fetch_property(address):
try:
return api.platforms.zillow.get_property(address)
except Exception as e:
print(f"Error: {address} - {e}")
return None
# Process in parallel
addresses = ['addr1', 'addr2', 'addr3', ...]
with ThreadPoolExecutor(max_workers=5) as executor:
properties = list(executor.map(fetch_property, addresses))
# Filter out None values
properties = [p for p in properties if p is not None]
Data Export
Export to CSV
import csv
def export_to_csv(properties, filename):
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=[
'address', 'price', 'bedrooms', 'bathrooms',
'sqft', 'zestimate', 'rent_zestimate'
])
writer.writeheader()
for prop in properties:
writer.writerow({
'address': prop.address,
'price': prop.price,
'bedrooms': prop.bedrooms,
'bathrooms': prop.bathrooms,
'sqft': prop.sqft,
'zestimate': prop.zestimate,
'rent_zestimate': prop.rent_zestimate
})
# Export
export_to_csv(properties, 'properties.csv')
Export to JSON
import json
def export_to_json(properties, filename):
data = [clean_property_data(p) for p in properties]
with open(filename, 'w') as f:
json.dump(data, f, indent=2)
# Export
export_to_json(properties, 'properties.json')
Performance Optimization
Caching
Cache frequently accessed data:
from functools import lru_cache
import hashlib
@lru_cache(maxsize=1000)
def get_property_cached(address):
return api.platforms.zillow.get_property(address)
# Use cached version
prop = get_property_cached('123 Main St, Austin, TX')
Rate Limit Management
Respect rate limits:
import time
def rate_limited_requests(addresses, requests_per_minute=100):
delay = 60.0 / requests_per_minute
for address in addresses:
prop = api.platforms.zillow.get_property(address)
yield prop
time.sleep(delay)
# Use generator
for prop in rate_limited_requests(addresses):
process(prop)
Quick Start
from propapis import PropAPIS
api = PropAPIS(api_key='your_api_key')
# Basic usage with error handling
try:
prop = api.platforms.zillow.get_property('123 Main St, Austin, TX')
# Validate data
if prop.price and prop.bedrooms:
print(f"{prop.address} - ${prop.price:,}")
else:
print("Incomplete data")
except Exception as e:
print(f"Error: {e}")
For complete API documentation, see our API Reference.