Geocoding Reference#
This demo is a brief overview of the documentation - Advanced Geocoding.
import shapely
from IPython.display import display, Markdown
from lets_plot import *
from lets_plot.geo_data import *
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
LetsPlot.setup_html()
def run_catching(f):
def colored(s):
return Markdown('<span style="color: #ff0000">{}</span>'.format(s))
try:
f()
assert False, 'Error expected'
except Exception as e:
display(colored('{}'.format(e)))
Basic usage#
# Level autodetection
geocode(names=['florida', 'tx']).get_geocodes()
# Result may contain the following columns:
# id - for internal use. Only in geocodes DataFrame.
# request - column with lowest administrative level. The request belongs to this level. Can be a city/county/state/country.
# parents - if provided. Can be a county/state/country.
# found name - name that found by geocoding.
# geometry - only in geometry DataFrame.
# Thanks to request column it's obvious that level was detected as state.
| id | state | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 162050 | florida | Florida | [-81.664617414276, 28.0571937561035] | [-87.6348964869976, 25.1162923872471, -80.0309... | [-87.6348964869976, 24.5230695605278, -80.0309... |
| 1 | 114690 | tx | Texas | [-99.6829525269137, 31.1685702949762] | [-106.645845472813, 25.8370596170425, -93.5078... | [-106.645845472813, 25.8370596170425, -93.5078... |
# Explicit level
geocode_states(['florida', 'tx']).get_geocodes()
| id | state | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 162050 | florida | Florida | [-81.664617414276, 28.0571937561035] | [-87.6348964869976, 25.1162923872471, -80.0309... | [-87.6348964869976, 24.5230695605278, -80.0309... |
| 1 | 114690 | tx | Texas | [-99.6829525269137, 31.1685702949762] | [-106.645845472813, 25.8370596170425, -93.5078... | [-106.645845472813, 25.8370596170425, -93.5078... |
# Parameters can be change between searches
florida = geocode_states('florida')
display(florida.countries('usa').get_geocodes())
display(florida.countries('uruguay').get_geocodes())
display(florida.countries(None).get_geocodes())
| id | state | found name | country | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 162050 | florida | Florida | usa | [-81.664617414276, 28.0571937561035] | [-87.6348964869976, 25.1162923872471, -80.0309... | [-87.6348964869976, 24.5230695605278, -80.0309... |
| id | state | found name | country | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 1635164 | florida | Florida | uruguay | [-55.8642029687055, -33.7640165537596] | [-56.5363445878029, -34.4264329969883, -55.098... | [-56.5363445878029, -34.4264329969883, -55.098... |
| id | state | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 162050 | florida | Florida | [-81.664617414276, 28.0571937561035] | [-87.6348964869976, 25.1162923872471, -80.0309... | [-87.6348964869976, 24.5230695605278, -80.0309... |
Scope#
# str scope uses level autodetection.
# NB: Florida in USA is the most relevant result.
# We can't find Florida in Uruguay using only the name - parent (country or scope) is required.
geocode_states('florida').scope('uruguay').get_geocodes()
| id | state | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 1635164 | florida | Florida | [-55.8642029687055, -33.7640165537596] | [-56.5363445878029, -34.4264329969883, -55.098... | [-56.5363445878029, -34.4264329969883, -55.098... |
# Geocoder scope
uruguay = geocode_countries('uruguay')
geocode_states('florida').scope(uruguay).get_geocodes()
| id | state | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 1635164 | florida | Florida | [-55.8642029687055, -33.7640165537596] | [-56.5363445878029, -34.4264329969883, -55.098... | [-56.5363445878029, -34.4264329969883, -55.098... |
# Scope is a singleton. Collections are not allowed.
scope = ['uruguay']
run_catching(
lambda: geocode_states('florida').scope(scope).get_geocodes()
)
Unsupported ‘scope’ type. Expected ‘str’ or ‘Geocoder’ but was ‘list’
# Geocoder with more than one entry is not allowed too.
scope = geocode_countries(['uruguay', 'usa'])
run_catching(
lambda: geocode_states('florida').scope(scope).get_geocodes()
)
’scope’ has 2 entries, but expected to have exactly 1
# str scope can be ambiguous.
run_catching(
lambda: geocode_cities('worcester').scope('worcester county').get_geocodes()
)
# Let's geocode Worcester County in a way how the service does - using level detection without parents.
# In fact Worcester County was found, but error message in case of parent ambiguity is not clear. We will improve it.
run_catching(
lambda: geocode_counties('worcester county').get_geocodes()
)
Region is not found: worcester county
Multiple objects (2) were found for worcester county:
Worcester County (United States, Maryland)
Worcester County (United States, Massachusetts)
Parents#
# Parents should have same length as names
geocode_cities(['warwick', 'worcester'])\
.counties(['Worth County', 'worcester county'])\
.states(['georgia', 'massachusetts'])\
.get_geocodes()
| id | city | found name | county | state | centroid | position | limit | |
|---|---|---|---|---|---|---|---|---|
| 0 | 119776 | warwick | Warwick | Worth County | georgia | [-83.9205776783726, 31.8303624540567] | [-83.9291015267372, 31.8222776055336, -83.9120... | [-83.9291015267372, 31.8222776055336, -83.9120... |
| 1 | 158851900 | worcester | Worcester | worcester county | massachusetts | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Parents can contain None items (e.g., countries with different administrative divisions).
geocode_cities(['warwick', 'worcester'])\
.states(['Georgia', None])\
.countries(['USA', 'United Kingdom'])\
.get_geocodes()
| id | city | found name | state | country | centroid | position | limit | |
|---|---|---|---|---|---|---|---|---|
| 0 | 119776 | warwick | Warwick | Georgia | USA | [-83.9205776783726, 31.8303624540567] | [-83.9291015267372, 31.8222776055336, -83.9120... | [-83.9291015267372, 31.8222776055336, -83.9120... |
| 1 | 20971097 | worcester | Worcester | None | United Kingdom | [-2.2095610731112, 52.1965283900499] | [-2.2632023692131, 52.1616362035275, -2.157303... | [-2.2632023692131, 52.1616362035275, -2.157303... |
# Geocoder object can be used as parent. Number of entries should be same as the number of names.
s = geocode_states(['vermont', 'georgia']).scope('usa')
display(s.get_geocodes())
# NB: Parent request will be present in result as a column.
display(geocode_cities(['worcester', 'warwick']).states(s).get_geocodes())
| id | state | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 60759 | vermont | Vermont | [-72.772353529363, 43.8718488067389] | [-73.4377402067184, 42.7269606292248, -71.4653... | [-73.4377402067184, 42.7269606292248, -71.4653... |
| 1 | 161957 | georgia | Georgia | [-83.2514879869572, 32.6792977005243] | [-85.6052421033382, 30.3557570278645, -80.8400... | [-85.6052421033382, 30.3557570278645, -80.8400... |
| id | city | found name | state | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 8898137 | worcester | Worcester | vermont | [-72.5724501055639, 44.4132962822914] | [-72.6543393731117, 44.3454243242741, -72.4935... | [-72.6543393731117, 44.3454243242741, -72.4935... |
| 1 | 119776 | warwick | Warwick | georgia | [-83.9205776783726, 31.8303624540567] | [-83.9291015267372, 31.8222776055336, -83.9120... | [-83.9291015267372, 31.8222776055336, -83.9120... |
# counties and states can be combined with scope. scope acts as a top level parent.
geocode_counties(['Dakota County', 'Nevada County']).states(['NE', 'AR']).scope('USA').get_geocodes()
| id | county | found name | state | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 1425447 | Dakota County | Dakota County | NE | [-96.5715826334556, 42.4019493162632] | [-96.7274482548237, 42.2765184938908, -96.3566... | [-96.7274482548237, 42.2765184938908, -96.3566... |
| 1 | 1826825 | Nevada County | Nevada County | AR | [-93.2913903139467, 33.6979349702597] | [-93.4838207066059, 33.4403765201569, -93.1042... | [-93.4838207066059, 33.4403765201569, -93.1042... |
# scope can't be combined with countries - geocoding won't try to guess what level is it
run_catching(
lambda: geocode_counties('Nevada County').countries('usa').scope('Arizona').get_geocodes()
)
Invalid request: countries and scope can’t be used simultaneously
# Parents and names should have same length
run_catching(
lambda: geocode_states(['florida', 'rivera']).countries('uruguay').get_geocodes()
)
Invalid request: countries count(1) != names count(2)
# Same for Geocoder
countries = geocode_countries('uruguay')
run_catching(
lambda: geocode_states(['florida', 'rivera']).countries(countries).get_geocodes()
)
Invalid request: countries count(1) != names count(2)
Ignoring an ambiguity#
# Ambiguous result generates an error:
run_catching(
lambda: geocode_cities(['warwick', 'worcester']).get_geocodes()
)
Multiple objects (15) were found for warwick:
Warwick (United States, Georgia, Worth County)
Warwick (United States, New York, Orange County)
Warwick (United Kingdom, England, West Midlands, Warwickshire)
Warwick (United States, North Dakota, Benson County)
Warwick (United States, Oklahoma, Lincoln County)
Warwick (Canada, Ontario, Southwestern Ontario, Lambton County)
Warwick (United States, Massachusetts, Franklin County)
Warwick (United States, Rhode Island, Kent County)
Warwick (Canada, Arthabaska, Québec, Centre-du-Québec)
Warwick (Australia, Queensland) Multiple objects (5) were found for worcester:
Worcester (United States, Vermont, Washington County)
Worcester (United Kingdom, England, West Midlands, Worcestershire)
Worcester (South Africa, Western Cape, Cape Winelands District Municipality)
Worcester (United States, Massachusetts, Worcester County)
Worcester Township (United States, Pennsylvania, Montgomery County)
# Ambiguous result can be converted to a matching result (e.g., for drawing on a map)
geocode_cities(['warwick', 'worcester']).allow_ambiguous().get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 119776 | warwick | Warwick | [-83.9205776783726, 31.8303624540567] | [-83.9291015267372, 31.8222776055336, -83.9120... | [-83.9291015267372, 31.8222776055336, -83.9120... |
| 1 | 176086 | warwick | Warwick | [-74.3590787617065, 41.2538411468267] | [-74.374563395977, 41.2334154546261, -74.33202... | [-74.374563395977, 41.2334154546261, -74.33202... |
| 2 | 176448 | warwick | Warwick | [-1.58227695103754, 52.3015402257442] | [-1.78017809987068, 52.2137045860291, -1.40608... | [-1.78017809987068, 52.2137045860291, -1.40608... |
| 3 | 181594 | warwick | Warwick | [-98.7057320814883, 47.8541030734777] | [-98.7164886295795, 47.8475135564804, -98.6948... | [-98.7164886295795, 47.8475135564804, -98.6948... |
| 4 | 184249 | warwick | Warwick | [-96.9995924696813, 35.6883452832699] | [-97.0261216163635, 35.6740544736385, -96.9776... | [-97.0261216163635, 35.6740544736385, -96.9776... |
| 5 | 4072420 | warwick | Warwick | [-81.8960721893947, 43.0157359689474] | [-82.0060113072395, 42.9303230345249, -81.7887... | [-82.0060113072395, 42.9303230345249, -81.7887... |
| 6 | 158818247 | warwick | Warwick | [-72.3365538645007, 42.667919844389] | [-72.4120393395424, 42.6094262301922, -72.2719... | [-72.4120393395424, 42.6094262301922, -72.2719... |
| 7 | 158863860 | warwick | Warwick | [-71.4332938210472, 41.715542525053] | [-71.5189133584499, 41.6628210246563, -71.3564... | [-71.5189133584499, 41.6293966770172, -71.3564... |
| 8 | 159726256 | warwick | Warwick | [-72.0051031618881, 45.952380001545] | [-72.0792764425278, 45.8764761686325, -71.9089... | [-72.0792764425278, 45.8764761686325, -71.9089... |
| 9 | 1817489924 | warwick | Warwick | [152.032703831792, -28.2163204997778] | [152.023720443249, -28.224236369133, 152.04168... | [152.023720443249, -28.224236369133, 152.04168... |
| 10 | 3049373 | warwick | Warwick Township | [-75.757813608352, 40.1801763474941] | [-75.8212745189667, 40.1465494930744, -75.6930... | [-75.8212745189667, 40.1465494930744, -75.6930... |
| 11 | 3521480 | warwick | Warwick Township | [-75.0764330968138, 40.2491855621338] | [-75.1225188374519, 40.2152167260647, -75.0345... | [-75.1225188374519, 40.2152167260647, -75.0345... |
| 12 | 9244563 | warwick | Warwick Mountain | [-63.3714760496144, 45.5978938937187] | [-63.4091444313526, 45.5644172430038, -63.3474... | [-63.4091444313526, 45.5644172430038, -63.3474... |
| 13 | 158903676 | warwick | West Warwick | [-71.5257788638961, 41.6969098895788] | [-71.5342850983143, 41.6620793938637, -71.4839... | [-71.5342850983143, 41.6620793938637, -71.4839... |
| 14 | 7997266 | warwick | Sainte-Élizabeth-de-Warwick | [-72.1010115992802, 45.9195195883512] | [-72.1493585407734, 45.8681344985962, -72.0435... | [-72.1493585407734, 45.8681344985962, -72.0435... |
| 15 | 8898137 | worcester | Worcester | [-72.5724501055639, 44.4132962822914] | [-72.6543393731117, 44.3454243242741, -72.4935... | [-72.6543393731117, 44.3454243242741, -72.4935... |
| 16 | 20971097 | worcester | Worcester | [-2.2095610731112, 52.1965283900499] | [-2.2632023692131, 52.1616362035275, -2.157303... | [-2.2632023692131, 52.1616362035275, -2.157303... |
| 17 | 30670038 | worcester | Worcester | [19.4459268450737, -33.6462374031544] | [19.4369441270828, -33.6537154018879, 19.45490... | [19.4369441270828, -33.6537154018879, 19.45490... |
| 18 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
| 19 | 3076291 | worcester | Worcester Township | [-75.3438698875367, 40.1926231384277] | [-75.4107637703419, 40.1558580994606, -75.2932... | [-75.4107637703419, 40.1558580994606, -75.2932... |
# Missing name gives an error
run_catching(
lambda: geocode_cities(names=['paris', 'worcester', 'foo']).get_geocodes()
)
# Missing parent also gives an error
run_catching(
lambda: geocode_cities('paris').countries('foo').get_geocodes()
)
No objects were found for foo.
Region is not found: foo
# ignore_not_found() - ingore unknown names, keep everything else, including ambiguous names
run_catching(
lambda: geocode_cities(['paris', 'worcester', 'foo']).ignore_not_found().get_geocodes()
)
# Missing parent also gives an error
run_catching(
lambda: geocode_cities(['paris', 'worcester']).countries(['foo', None]).ignore_not_found().get_geocodes()
)
Multiple objects (5) were found for worcester:
Worcester (United States, Vermont, Washington County)
Worcester (United Kingdom, England, West Midlands, Worcestershire)
Worcester (South Africa, Western Cape, Cape Winelands District Municipality)
Worcester (United States, Massachusetts, Worcester County)
Worcester Township (United States, Pennsylvania, Montgomery County)
Multiple objects (5) were found for worcester:
Worcester (United States, Vermont, Washington County)
Worcester (United Kingdom, England, West Midlands, Worcestershire)
Worcester (South Africa, Western Cape, Cape Winelands District Municipality)
Worcester (United States, Massachusetts, Worcester County)
Worcester Township (United States, Pennsylvania, Montgomery County)
# ignore_all_errors() - keep only exactly matched names
geocode_cities(['paris', 'worcester', 'foo']).ignore_all_errors().get_geocodes()
geocode_cities(['paris', 'worcester']).countries(['france', 'foo']).ignore_all_errors().get_geocodes()
| id | city | found name | country | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 17807753 | paris | Paris | france | [2.32002815231681, 48.8587861508131] | [2.22412258386612, 48.8155750930309, 2.4697606... | [2.22412258386612, 48.8155750930309, 2.4697606... |
# ignore_not_found() + allow_ambiguous() - see all ambiguous names without "not found" error
geocode_cities(['paris', 'worcester', 'foo']).ignore_not_found().allow_ambiguous().get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 17807753 | paris | Paris | [2.32002815231681, 48.8587861508131] | [2.22412258386612, 48.8155750930309, 2.4697606... | [2.22412258386612, 48.8155750930309, 2.4697606... |
| 1 | 8898137 | worcester | Worcester | [-72.5724501055639, 44.4132962822914] | [-72.6543393731117, 44.3454243242741, -72.4935... | [-72.6543393731117, 44.3454243242741, -72.4935... |
| 2 | 20971097 | worcester | Worcester | [-2.2095610731112, 52.1965283900499] | [-2.2632023692131, 52.1616362035275, -2.157303... | [-2.2632023692131, 52.1616362035275, -2.157303... |
| 3 | 30670038 | worcester | Worcester | [19.4459268450737, -33.6462374031544] | [19.4369441270828, -33.6537154018879, 19.45490... | [19.4369441270828, -33.6537154018879, 19.45490... |
| 4 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
| 5 | 3076291 | worcester | Worcester Township | [-75.3438698875367, 40.1926231384277] | [-75.4107637703419, 40.1558580994606, -75.2932... | [-75.4107637703419, 40.1558580994606, -75.2932... |
# List only 10 first distinct not found names
run_catching(
lambda: geocode_cities(['foo', 'foo', 'foo', 'foo4', 'foo5', 'foo6', 'foo7', 'foo8', 'foo9', 'foo10', 'foo11', 'foo12', 'foo13', 'foo14', 'foo15']).get_geocodes()
)
No objects were found for foo, foo4, foo5, foo6, foo7, foo8, foo9, foo10, foo11, foo12 and (3) more.
# Empty DataFrame if no matching names left
geocode_cities('worcester').ignore_all_errors().get_geocodes()
| id | city | found name | centroid | position | limit |
|---|
where() function#
# Take object closest to a place.
boston = geocode_cities('boston')
geocode_cities('worcester').where('worcester', closest_to=boston).get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Take object closest to a coordinate.
boston_coord = boston.get_centroids().geometry[0]
geocode_cities('worcester').where('worcester', closest_to=boston_coord).get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Or take object within rectangular area
geocode_cities('worcester')\
.where('worcester', scope=shapely.geometry.box(-71.00, 42.00, -72.00, 43.00))\
.get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Or by defining a query scope. In this case name from the scope will not go into the result DataFrame
massachusetts = geocode_states('massachusetts')
geocode_cities('worcester').where('worcester', scope=massachusetts).get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Query scope also can be a string
geocode_cities('worcester').where('worcester', scope='massachusetts').get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 158851900 | worcester | Worcester | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Query scope overrides parents while keeping parents in a result dataframe.
worcester_county=geocode_counties('Worcester County').states('massachusetts').countries('usa')
geocode_cities(['worcester', 'worcester'])\
.countries(['USA', 'United Kingdom'])\
.where('worcester', country='USA', scope=worcester_county)\
.get_geocodes()
| id | city | found name | country | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 158851900 | worcester | Worcester | USA | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
| 1 | 20971097 | worcester | Worcester | United Kingdom | [-2.2095610731112, 52.1965283900499] | [-2.2632023692131, 52.1616362035275, -2.157303... | [-2.2632023692131, 52.1616362035275, -2.157303... |
# Query scope should contain single object
countries = geocode_countries(['usa', 'uruguay'])
run_catching(
lambda: geocode_states('florida').where('florida', scope=countries).get_geocodes()
)
’scope’ has 2 entries, but expected to have exactly 1
# NB: Parent is used only for searching exact row in request.
# It doesn't modify any parent (neither existing or empty).
run_catching(
lambda: geocode_cities('worcester')\
.countries('USA')\
.where('worcester', country='USA', state='iowa', county='worcester county')\
.get_geocodes()
)
worcester(county=worcester county, state=iowa, country=USA) is not found in names
geocode_cities('warwick') \
.where('warwick', scope=shapely.geometry.box(-72, 41.5, -71, 42)) \
.allow_ambiguous() \
.get_geocodes()
| id | city | found name | centroid | position | limit | |
|---|---|---|---|---|---|---|
| 0 | 158863860 | warwick | Warwick | [-71.4332938210472, 41.715542525053] | [-71.5189133584499, 41.6628210246563, -71.3564... | [-71.5189133584499, 41.6293966770172, -71.3564... |
| 1 | 158903676 | warwick | West Warwick | [-71.5257788638961, 41.6969098895788] | [-71.5342850983143, 41.6620793938637, -71.4839... | [-71.5342850983143, 41.6620793938637, -71.4839... |
Error handling#
# Failed to find parent
run_catching(
lambda: geocode_states('florida').countries('foo').get_geocodes()
)
Region is not found: foo
# ambiguous parent - Worcester County. Better message required.
run_catching(
lambda : geocode_cities('worcester').counties('worcester county').scope('usa').get_geocodes()
)
Region is not found: worcester county
# No us-48 at non-state level
run_catching(
lambda: geocode_counties('us-48').get_geocodes()
)
Invalid level for request with us-48. Should be STATE, but was COUNTY.
Geocoding and geoms#
cities = geocode_cities(['boston', 'new york'])
p = ggplot() + ggsize(300, 200)
# geocoder object can be used as map parameter to simply display a geometry
gggrid([
p + geom_map(map=cities, fill='gray') + ggtitle('geom_map()'),
p + geom_rect(map=cities, fill='gray') + ggtitle('geom_rect()'),
p + geom_point(map=cities) + ggtitle('geom_point()'),
]) + ggsize(900, 200)
# GeoDataFrame also can be also as map parameter to display a geometry - syntax is the same as with Geocoder.
# It is usefull for optimisation - geocoder caches geocodes, but doesn't cache geometries.
centroids = cities.get_centroids()
bboxes = cities.get_limits()
boundaries = cities.get_boundaries()
p = ggplot()
gggrid([
p + geom_map(map=cities.get_boundaries(), fill='gray') + ggtitle('geom_map()'),
p + geom_rect(map=cities.get_limits(), fill='gray') + ggtitle('geom_rect()'),
p + geom_point(map=cities.get_centroids()) + ggtitle('geom_point()'),
]) + ggsize(900, 200)
map and map_join#
# map_join allows to join data and geometry.
# To make this exmaple more complex a demo data contains cities with same name (Worcester).
# Also there is a city and state with same name (New York).
# All names in DataFrame are in lower case to distinct user input from geocoding result.
import pandas
d = pandas.DataFrame({
'City_Name': ['boston', 'new york', 'worcester', 'worcester'],
'State_Name': ['massachusetts', 'new york', 'vermont', 'massachusetts'],
'mean': [523, 556, 600, 533]
})
geocoder = geocode_cities(d.City_Name).states(d.State_Name)
geocoder.get_geocodes()
| id | city | found name | state | centroid | position | limit | |
|---|---|---|---|---|---|---|---|
| 0 | 158809705 | boston | Boston | massachusetts | [-71.0884755326693, 42.3110405355692] | [-71.1912493407726, 42.2279115021229, -71.0096... | [-71.1912493407726, 42.2279115021229, -70.9244... |
| 1 | 61785451 | new york | New York | new york | [-73.8673749469137, 40.6847005337477] | [-74.0417455136776, 40.5700233578682, -73.7001... | [-74.2556785047054, 40.4960802197456, -73.7001... |
| 2 | 8898137 | worcester | Worcester | vermont | [-72.5724501055639, 44.4132962822914] | [-72.6543393731117, 44.3454243242741, -72.4935... | [-72.6543393731117, 44.3454243242741, -72.4935... |
| 3 | 158851900 | worcester | Worcester | massachusetts | [-71.8154652712922, 42.2678737342358] | [-71.8840424716473, 42.2100399434566, -71.7312... | [-71.8840424716473, 42.2100399434566, -71.7312... |
# Cache boundaries
background_states = geocode_states(['massachusetts', 'new york', 'vermont']).inc_res().get_boundaries()
def draw_plot(map, map_join):
return ggplot() + \
geom_map(map=background_states) + \
geom_point(aes(size='mean', color='City_Name'), data=d, map=map, map_join=map_join) + \
theme_void()
# Draw a GeoDataFrame with a data.
# Names in GeoDataFrame from Geocoder are predefined: 'city', 'county', 'state', 'country'
# Order of levels in map_join should match:
draw_plot(map=geocoder.get_centroids(), map_join=[['City_Name', 'State_Name'], ['city', 'state']])
# Note that Worcesters have proper position and data, but color is the same color.
# To make the color distinct a new column with combination of city and state names can be used.
# With Geocoder or GeoDataFrame returned by Geocoder we can omit generated map names.
# Only data columns have to be defined. Map columns will be generated with the following order:
# - city, county, state, country.
# Not requested levels will be ommited.
# Data columns should exactly follow this order.
draw_plot(map=geocoder, map_join=[['City_Name', 'State_Name']])
# Not following the order leads to an unexpected result:
draw_plot(map=geocoder, map_join=[['State_Name', 'City_Name']])
us48 = geocode_states('us-48').inc_res()
p = ggplot() + \
theme_void() + theme(legend_position='none') + \
ggsize(600, 300)
# Geocoder can be passed to a data parameter. In this case column 'found name' can be used for join and styling:
p + geom_map(aes(fill='found name'),
data=us48,
map=us48, map_join='found name',
tooltips=layer_tooltips().line('@{found name}'))
# With GeoDataFrame as data a plot spec is even more compact
p + geom_map(aes(fill='found name'), data=us48.get_boundaries(), tooltips=layer_tooltips().line('@{found name}'))
# map_join works fine even when data and map rows doesn't match
# For the simplicity I'll re-use states from us-48. Names can be provided by user.
import random
random.seed(1)
area_of_interest = us48.get_geocodes().state.tolist()
length = 30
mean_by_state = {
'State_Name': random.sample(area_of_interest, length),
'Mean_Value': random.sample(range(0, 500), length)
}
p + geom_map(
aes(fill='Mean_Value'),
data=mean_by_state, map=us48,
map_join='State_Name',
tooltips=layer_tooltips()
.line('@{found name}')
.line('mean:|@Mean_Value')
)
# Note the variable 'found name' that is used in tooltip.
# Thanks to map_join this variable is available for tooltip processor.