This HTML5 document contains 528 embedded RDF statements represented using HTML+Microdata notation.

The embedded RDF content will be recognized by any processor of HTML5 Microdata.

PrefixNamespace IRI
xhvhttp://www.w3.org/1999/xhtml/vocab#
n14https://discourse.looker.com/t/fun-with-data-calculating-gender-balance-using-first-names/2526.
rdfhttp://www.w3.org/1999/02/22-rdf-syntax-ns#
n10http://ods-qa.openlinksw.com:8896/proxy-iri/
n19https://discourse.looker.com/u/
n23https://fonts.googleapis.com/css?family=Google+Sans:400,500|Roboto:400,400italic,500,500italic,700,700italic|Roboto+Mono:400,500,700|Material+
n17http://ods-qa.openlinksw.com:8896/about/id/https/discourse.looker.com/t/fun-with-data-calculating-gender-balance-using-first-names/
voidhttp://rdfs.org/ns/void#
n6http://ods-qa.openlinksw.com:8896/about/id/entity/https/discourse.looker.com/t/fun-with-data-calculating-gender-balance-using-first-names/
schemahttp://schema.org/
n20https://fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400italic,500,500italic,600,600italic,700,
n22https://discourse.looker.com/opensearch.
owlhttp://www.w3.org/2002/07/owl#
n9https://discourse.looker.com/stylesheets/desktop_theme_17_30f7f8cb3659ef54811eea52ecc4f61b0e9d9550.css?__ws=discourse.looker.
n16twitter:
mdhttp://www.w3.org/1999/xhtml/microdata#
n2https://discourse.looker.com/t/fun-with-data-calculating-gender-balance-using-first-names/
n12https://discourse.looker.com/uploads/default/optimized/2X/7/7246bd2ffbb1e2fbe30212f6832855d1b6d95756_2_32x32.
rdfshttp://www.w3.org/2000/01/rdf-schema#
awolhttp://bblfish.net/work/atom-owl/2006-06-06/#
pwdrhttp://www.w3.org/2007/05/powder-s#
n4http://ods-qa.openlinksw.com:8896/sparql/
dchttp://purl.org/dc/elements/1.1/
n21https://discourse.looker.com/t/fun-with-data-calculating-gender-balance-using-first-names/2526#
foafhttp://xmlns.com/foaf/0.1/
n8https://discourse.looker.com/uploads/default/optimized/2X/5/5a6904546e7c2ff87c53f993551e16ae1ecb2194_2_180x180.
n24https://discourse.looker.com/c/data-explorers-guild/
n18https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.
siochttp://rdfs.org/sioc/ns#
n30https://discourse.looker.com/stylesheets/desktop_37_14dfdd0e6cc5b515b82ba24e5498eeb2d1a9b487.css?__ws=discourse.looker.
prvhttp://purl.org/net/provenance/ns#
xsdhhttp://www.w3.org/2001/XMLSchema#
Subject Item
n2:2526
rdf:type
schema:CreativeWork n2:WebSite
owl:sameAs
n21:this
pwdr:describedby
n17:2526
foaf:topic
schema:articleBody schema:keywords schema:dateModified schema:itemListElement schema:position schema:author schema:name schema:mainEntityOfPage schema:datePublished schema:publisher schema: _:vb829636 _:vb829634 _:vb829635 _:vb829632 _:vb829633 _:vb829630 _:vb829631 _:vb829628 _:vb829629 _:vb829626 _:vb829627 _:vb829624 _:vb829625 _:vb829622 _:vb829623 _:vb829620 _:vb829621 _:vb829616 _:vb829617 _:vb829614 _:vb829615 _:vb829612 _:vb829613 _:vb829610 _:vb829611 _:vb829608 _:vb829609 _:vb829606 _:vb829607 _:vb829604 _:vb829605 _:vb829602 _:vb829603 _:vb829600 n21:rdf_load_htmlvxc_provenance _:vb829601 n10:06cec9d431d98a74b7e8a6a61ee2d1a25ef0638a n10:884441e39205b131ed13c84ed4c0e918643c597d _:vb829599 n10:c17e5b48b92e0299dddfe8fd0b253da5d87703b2 n10:c31457635311f40c49ce4864b40a160a747f1036 n10:0de78839bd87aed6ac883fc732cf49d6fd50da01 n10:1b6a2c679ee0ff60a5a69cdff8edad343a8a6150 n10:68f096cd24b0e423ad1491390c1c83f3b3fb1ab6 n10:50c4c58f14fc71b25a1f033a187b2810cf1e8d66 n10:417b4a046bfb4a6b5e5493fb9d8cc40176d4c86e n10:a4549329beba02d9f16d93924ccf0e1d72c77e5a n10:085430f5d7c624451f5af5114481f8940ebab17e n10:0a60fa2732889fa8bcdf977c7101c8233f3d8e15 n10:5a3bd94696057128dc666f301e7a21c7b5f08062 n10:7fb2599a3e60da111bf236d71a9307fc5c1feb7f n10:91563ae82d577f5ef31783794889f2e426233c32 n10:4283d22d5c5abd6b8d27d310f92b82a0395c4222 n10:e0470cf575d1028cf49b5458f7071779c6d7edc7 n10:d5b5b602e9c34c499183ce6b267b9f03385b1d3f n10:f6e0e688fa5db64b9e22288dbfba2c035baf182c n10:428970f3896a3ec150b658a7a82f216df07cc5be n10:3d3f880835d5a2fff4b52dca975fc23d5f2aa579 n10:63332d134df882005659bc31147bc11cdf4951c9 n10:e766861ab1fd3c7054e341a07241ef14c5f619eb n10:ab0688c94e76f0f86481ee453a3b26eeef4c01a6 n10:cc4dac7470be88e0790c768656833ec5341d5a79 n10:3dc5746322df87a2c010c89d7957269ba645a52e n10:879ecb9bc939779ee06100d7fa950d7657eeb6 n10:b3a0c782e36b256c7d733c7e58bf8c02e2da3924 n10:e3ce5e18ca492b6ea545b3d13e917417fbd48d86 n10:13c7763905fc2b19badb9864a6bf85b9d4d7bd56 n10:33e667e8152e66adb16a502dcfc4a8a85391bfa6 n10:62c02e346e624ed5d242ec3ff6698cefb77a362c n10:5c484a3ba09fa151f7e1771800c349e309ea0bd5 n10:fdb98f0a7ad1883f8f68f16b35b9cddf245826d3 n10:819528c36f1e19862db507327b12c97b762e80e9 n10:3d6a6b3d39e8340e1743e1efed3e02fd330a6eab n10:1d926e246870b5c110a40f8c658fd3a10dac93ef n10:9d4850f819224b3402e238aa77571f09a1d33ed4 n10:c428f8e6a48edfaa909fa8f5d8e803f4465fa025 n10:32728f30017a762b4a0a5be46ecb508844818ed4 n10:8f8109efd6a8929569b6751385cebadeefa4eae5 n10:4835663300829642a6991b0a5030a3aad1c79561 n10:9dbd2993fea3c6c858541a27660e9365008f7d67 schema:url n10:740eb4d807cfb50897cc178d8b362593f57b7a78 n10:fab68f23451dfc216c34c006fe92136bed51cf3e n10:ca0f7c1cbd21c29c96a4dd8f7e74b30fe9d5b5c5 n10:a9760451a16e23ed11ad2061cb2b6beb781ea18b n10:3c813fb91ec2b200624082874dc1ee5d472cfefd n10:d75e35a355c1874189602d5cd41b8474bd9f9397 n10:171587583aebe4e10955f0db39adbc9a62a3ea43 n10:c735d1baeec4444d4991bb2f5e52b840e4b4af2b n10:7ac17a938b9f76f4fc6c9f966b4e227acc1927af n10:d093112f1deb423dfd91313d25c1d0ae463b30ed n10:cd459310ff1a308f614042241ca419938062b6ae n10:4d55df11a830f07b901b71285270b5e88fb8eed8 n10:706f02af9e65c4aa6b8f00c8565ee37c93068623 n10:4ddb7610af3e232430b3c1666abbdf5081b079af n10:7edeeb2505dd59bad363cbb8b4c3009106c2c352 n10:d040a9f736d3a191e44372eca3fcdaf18885db n10:6f87fec9fe40ab5d81517dbc98ff5bb53c0aa53f n10:51f4af3ece1b3678ac119b870feba794727b2bb4 n10:b7699a542f4ce4b2ba52ee86cd2cc0586058cda8 n10:64f6f71a38702f5748ef9f816321379db3c4e830 n10:a248342a2b389cfda2a0c3aee4a70bd69e16a6bc n10:0fab599a66f5d2699a6ae2e6e746344852cb2572 n10:6d954a014c6136028d9aff298c9db7e0c674a741 n10:4e6af2451229f09fc74a45008da4f91a2b42054e n10:4d86444a14e2ec0c5e0b3fc70bfdee2a766bce87 n10:482c655198b278fae38a74bd32a6ef8c4e4cad4a n10:fc373a14ffd27850ca9494c5f52729820a99bdac n10:52abf23b4cf2192618644d63c974c230343d8aef n10:257732165593d6687937a9fa2d2b261abe41f618 n10:a3806cad442c3a2780bceb5d02938dd805cbbf87 n10:540603eb8cbaf44fe49a4b7882689e7abf310fdb n10:4e0c68e391bbe8dd581e284d40fe392816d0264f n10:e411b8a7b3645d9d82dd6f41c6a671e79c1487eb n10:d2e83d21f3254fea9460bdb14b50d67c1f584165 n10:50e8283fedc1684787049e794021fbee829b44ee n10:2114dcdd1a8f99a25086cdf0d23efebfd0d81ed0 n10:e7f168dd4f399cfdfe3addf2d36927299bf151b5 n10:3e94df9849baaf2c22b556f76c6d889b83d18451 n10:b4e1c96831c56fdfbee1923ae23138dceea08743 n10:de68e132013b9b635a2786bfe99686aac2fbf86e n10:8352b1a32801557893a6011ef056864c14c6c89b n10:46a06030565b957153789eba001aa99607de6a1b n10:7a2f99b21faa3fd817ae074f41d71897b51f02cb n10:5e181f4f7a40cdee8aa49ec7c14de7f1a8ce61b9 n10:d8f70c83919498e57e24279823a999247be31897 n10:2618abb882c297232725f5a46be2ee3cb0aac641 sioc: n10:c9fa500058e6814e455585a4b1cb1f616c536bb3 n10:8b7df625a8587a0da322bd926d8dd6486b5b9436 n10:31e5978b0decd6c0a1c1643e5de0d1a18f06e5 n10:4b14088b2e069579d0cdf34a06d23f8d373a8974 n10:7e2929d7f06bdb5d6285c4e764be668c3724a903 rdf: n10:0a09988ca8530f98420001dfc22aefffc936ac09 n10:472f44e318d8354530dc0c716750a41098413ffe n10:fcf26d4b3e60b456176bc22856890cb9ecad50a1 owl: n10:d5e22ffc9a257fd4c2a48d8c35683785debe97d1 n10:51358aeb9898e906b17157fabee59676af284f20 n10:406a1dd56981506f015c855adc294dc84b68f682 n10:9eab91a09e06ddf367246a717cf70fb73802fbc5 n10:a63ca3ef63df150e614db72f1fcb3f2250853875 n10:4388c81ce4b033a5971bb19f5135cac94f5f8f91 n10:9f537d3637060625159de90f69b58af89f3e8ee8 n10:41babed6402acdd522931682a4c26353c316254c n10:ee461742028e4d3b6115b7f2ffb92b9e73e57dc5 n10:b6fbf8ac968820a115256ff9913c3bafe051bbbe n10:882524cb1e339c48385089bc872029d8d6c108e4 n10:aa51291853f6c14745278cbdf399f731d66a9e13 n10:1ec2e3ae0cceef7e3e5428f0c1d3280443b8b8c7 n10:a946c5899e9ee2b40178d9a0c86a81530fb3dc53 n10:3f7c9690855d7310f21ac2adef4a6c8d05668170 n10:fb38f449425de45728760901339e9d638c798dca n10:5140b53d3b502f6056000dc61fb3c2038078e0c5 n10:23260e35190e1caf768f8caf18589c7c3a9f0e38 n10:03fcdfafbce9deb3d8b0dc36aebdb1a361285589 schema:headline n10:725b578dbe55114943f86557599b7f9c682fe317 n10:c0a3b0a47a4ad635158aff851fe25021674d97be n10:08c36448ebbe2d2df60d5009798be5d97110a13a n10:9aae9fa790b3b48225e624856ae01c05a3b0b27f n10:7815a4ae5f3fcecd0eefcf7e2def377d5493b156 n10:2ec407bbf622bc715ba3f91cf0db121e531ff171 n10:53c126e427d5692dfab4bed88fb924a61add4fdb n10:fd9327ce074e39565cd7d288f3a26a0fdc406ed1 n10:6810aed25bf1fe0ba2b4d513ca20f6360ba9e822 n10:57e59f4a1c0eb434a890a31845d9c89ca08f1fe7 n10:5d37c46a110f689e10d47f2a0de208f176152087 n10:ac53b5922a093686f0808c8f6e0d1e4b6e64f412 awol: n10:0786339df57a8b3894c122311313233f9513eeb2 xhv: n10:6b05b2b08c7d821d5f10230edc884cdbdaa2c063 n10:4bb29a0895dde6c3fd13ed4f678925913b9ca1d6 n10:0857974da22b5aaa20436cf80ee582b2d592e6a0 n10:f058f86968d172a7a4cd513a2f09b5b551eda5d5 n10:dff49c58719164cc36824d3a5f15a0f960aa8049 n10:34b04de390eaacfd49aa2502da978dce5b17d30a n10:4b2bf6ff3c882a56b7bfffc8e534a514743c39e5 n10:3d14fd229fc7fc51bcda6ba1fbc32f205cfa7ccb n10:973c907faeffbd73243f377c0244d45474e5065d n10:24d20e98b2ce0dc30af5be346816c9a6dafc68be n10:1632591fb8b8b08480a2a6e1b44b5a79dd08d453 n10:22a5255de7b3f5de953ab0e75889df4573eb8cc0 n10:bb807ae122765b1756a690936f578cf2a5a620d3 n10:a7592a7404056eb2ce1d5c4f62618b238627c8c8 n10:554b3f1e86e3944594b97c4c2a59cfe122dde618 n10:2998f88831b6f4f8e9d19ed82d7ce62bc823afa3 n10:312e320773ae2db38fae84a026f0f1073127fff4 n21:userInteractionCount n21:interactionStatistic n21:interactionType n10:c4a587315b944bf2d01e5e3ca6bb0d895df4a598 n10:a8dad16f9558be06eda1d93c740a4ec44aa8f65f n10:81fa68c85e69a8cf86ab31f167b2095ebcfbb7fb n10:a460208fdfe3acbf7e7e50012b585ab588a73320 n10:39199de5398ef3a7aa4811b53e29c7557a665d1a n10:adf627fe865b97f318f02ec1f4df1c78dbe737a8 n10:bdfffcbd5821ac09b0d6c12d023e4ac807461b41 n10:62169e7141915e26682153e7ea0a4a817796e8c2 n10:bcb76b6046c0c60312dddccd91ac5f32ca1ed4e9 n10:b43fc0710eb5d072ecbc008eaf4750df59d0d22d n10:50c0f301fa1ad03de29aa3f24310b56122818948 n10:4218899079d2fcead2151aa20ce3760cd9df8ad7 n10:00d1d1e04beb12927391e98fb6b9ad7c0ca950bf n10:56cc2062eb5189a23f006ea93ceed7a2b09371ad n10:d45ad8651b43f88f310a6c30d58a5a8a17a9e179 n10:0e3b7aad092168e700af1d52a0005b2362784d2d n10:911b8f5cbed917b8fba7e8b443b32d34da5d9d6d n10:534cddfcf61998b5d5b77c6254d6d076f4c2f04f n10:249cebd34fedb0ecdb5c7d00fd723f2f589a5fe7 n10:e0584f99975d2edccf814b96cb534e94f7db09b2 n10:2e73d276156b5fccb79eb7f3b9b9f90dbe5c98c4 n10:a4bd6656f73f4358de9707bd2283a1549bd4eb6e n10:e7eb5316d18b076458111d65c57c352e4f06298f n10:4505bf302f5f813caff5cea910750f9b95b41548 n10:dc005e361537f5f680694098db336f9f6038a695 n10:f0aaf6c02493d7c7942289f6856eb814cc844b61 n10:5353fd61c5f3e5676e4bdcf58c81aafce1b4879f n10:074ec481ed4e57106f72adee3ec8ab4450609cf9 n10:61c69c021ffce40986654a6e73de66ef01ef4082 n10:8a72ac2074d1647d2b3e108905246f20c7ef149f n10:cb27214e670c3cdeaa46b627355d055090353ac9 n10:026a3402b00c8f84aeb8229f5114c44cc1c217b6 n10:ad3df29d45a3065536bcd161f3ff05cc94e338ec n10:f7adba794ed0d211902952c9d37683918221820f n10:29938198b5f56b1eec30b72f1637daab69813117 n21:rdf_load_json n21:DataCreation n10:6d82e5614a5fe6e5fe0c786f66446885064bf37c n21:this dc: schema:item
schema:mainEntity
n6:2526
schema:potentialAction
n10:6d82e5614a5fe6e5fe0c786f66446885064bf37c
md:item
_:vb829630 _:vb829631 _:vb829628 _:vb829629 _:vb829626 _:vb829627 _:vb829624 _:vb829625 _:vb829622 _:vb829623 _:vb829620 _:vb829621 _:vb829614 _:vb829615 _:vb829612 _:vb829613 _:vb829610 _:vb829611 _:vb829608 _:vb829609 _:vb829606 _:vb829607 _:vb829604 _:vb829605 _:vb829602 _:vb829603 _:vb829600 _:vb829601 _:vb829599 _:vb829636 _:vb829634 _:vb829635 _:vb829632 _:vb829633
n16:card
summary
n16:description
In the The Name Game: Step 3 we were able to compute, given a name, the percent likelihood that the name is male. The calculation is computed by dividing the size of the male population for a given name by the total population for that name. Some Gender Neutral Names Explore From Here Building a Name Map We can use the same table in BigQuery and this simple query to build a lookup table that maps names to their likelihood of being male. If we don’t have data (later when we join and the join...
n16:image
https://discourse.looker.com/uploads/default/original/2X/5/5a6904546e7c2ff87c53f993551e16ae1ecb2194.png
n16:title
Fun with Data: Calculating Gender Balance Using First Names
n16:url
https://discourse.looker.com/t/fun-with-data-calculating-gender-balance-using-first-names/2526
void:sparqlEndpoint
n4:
schema:url
n2:2526 https://discourse.looker.com
n2:alternate
n14:rss
n2:apple-touch-icon
n8:png
n2:canonical
n2:2526
n2:description
In the The Name Game: Step 3 we were able to compute, given a name, the percent likelihood that the name is male. The calculation is computed by dividing the size of the male population for a given name by the total pop…
n2:generator
Discourse 2.6.0.beta1 - https://github.com/discourse/discourse version 957e851ffe9bf15bb4d8c6a6d4fe8ff9326f86da
n2:icon
n12:png
n2:search
n22:xml
n2:site-filter
24 2 32
n2:site-priority
2
n2:stylesheet
n9:com n18:css n20:800 n23:Icons n30:com
n2:theme-color
#ffffff
n2:viewport
width=device-width, initial-scale=1.0, minimum-scale=1.0, user-scalable=yes, viewport-fit=cover
Subject Item
n10:2e73d276156b5fccb79eb7f3b9b9f90dbe5c98c4
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 16
pwdr:describedby
n17:2526 n6:2526
rdf:object
n10:6d82e5614a5fe6e5fe0c786f66446885064bf37c
rdf:predicate
schema:potentialAction
rdf:subject
n2:2526
Subject Item
n10:534cddfcf61998b5d5b77c6254d6d076f4c2f04f
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 17
pwdr:describedby
n17:2526 n6:2526
rdf:object
https://discourse.looker.com
rdf:predicate
schema:url
rdf:subject
n2:2526
Subject Item
n10:249cebd34fedb0ecdb5c7d00fd723f2f589a5fe7
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 18
pwdr:describedby
n17:2526 n6:2526
rdf:object
n2:WebSite
rdf:predicate
rdf:type
rdf:subject
n2:2526
Subject Item
n10:0e3b7aad092168e700af1d52a0005b2362784d2d
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 19
pwdr:describedby
n17:2526 n6:2526
rdf:object
n2:2526
rdf:predicate
prv:accessedService
rdf:subject
_:vb829616
Subject Item
n10:911b8f5cbed917b8fba7e8b443b32d34da5d9d6d
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 20
pwdr:describedby
n17:2526 n6:2526
rdf:object
2020-10-25T02:21:01.469691
rdf:predicate
prv:performedAt
rdf:subject
_:vb829616
Subject Item
n10:56cc2062eb5189a23f006ea93ceed7a2b09371ad
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 21
pwdr:describedby
n17:2526 n6:2526
rdf:object
n21:spongerInstance
rdf:predicate
prv:performedBy
rdf:subject
_:vb829616
Subject Item
n10:d45ad8651b43f88f310a6c30d58a5a8a17a9e179
rdf:type
rdf:Statement
rdfs:label
Embedded JSONLD-in-HTML Statement 22
pwdr:describedby
n17:2526 n6:2526
rdf:object
prv:DataAccess
rdf:predicate
rdf:type
rdf:subject
_:vb829616
Subject Item
_:vb829599
rdf:type
http://schema.org/BreadcrumbList
pwdr:describedby
n6:2526
itemListElement
_:vb829600
Subject Item
_:vb829600
rdf:type
http://schema.org/ListItem
pwdr:describedby
n6:2526
position
1
item
n24:54
name
Data Explorers Guild
Subject Item
_:vb829601
rdf:type
http://schema.org/DiscussionForumPosting
publisher
_:vb829602
author
pwdr:describedby
n6:2526
dateModified
2019-01-16T19:07:57Z
mainEntityOfPage
n2:2526
articleBody
In the The Name Game: Step 3 we were able to compute, given a name, the percent likelihood that the name is male. The calculation is computed by dividing the size of the male population for a given name by the total population for that name. Some Gender Neutral Names Explore From Here Building a Name Map We can use the same table in BigQuery and this simple query to build a lookup table that maps names to their likelihood of being male. If we don’t have data (later when we join and the join fails), we assume a 50% probability. Old LookML - view: gender_guess derived_table: sql: | SELECT UPPER(name) AS name , FLOAT(SUM(CASE WHEN gender = 'M' THEN number ELSE 0 END)) / SUM(number) AS percentage_male FROM [fh-bigquery:popular_names.usa_1910_2013] GROUP EACH BY 1 fields: - dimension: name - dimension: percentage_male type: number sql: COALESCE(${TABLE}.percentage_male, 0.5) New LookML view: gender_guess { derived_table: { sql: SELECT UPPER(name) AS name , FLOAT(SUM(CASE WHEN gender = 'M' THEN number ELSE 0 END)) / SUM(number) AS percentage_male FROM [fh-bigquery:popular_names.usa_1910_2013] GROUP EACH BY 1 ;; } dimension: name {} dimension: percentage_male { type: number sql: COALESCE(${TABLE}.percentage_male, 0.5) ;; } } Names and likelihood they are Male The names with Percentage Male = 1 are certainly male, the names with 0 are certainly female. The fractional names are somewhere between. Explore From Here Names and the United States Patent and Trademark Office (USPTO) I recently uploaded all the USPTO data to BigQuery. The main table in this dataset is ‘case_files’ and on each case file, there is the name of the attorney assigned to the case. Attorney Names Attorneys and the number of cases they’ve worked on. Explore From Here Parsing out the First Name It appears that names are of the form <LAST_NAME>, <FIRST_NAME> <INITIAL OR NAME> We can parse the pretty easily using a regular expression. First names appear to immediately follow the comma. We can codify this with a new dimension and a regular expression. Old LookML - dimension: exm_attourney_first_name sql: REGEXP_EXTRACT(${exm_attorney_name}, `, (\\w+)`) New LookML dimension: exm_attourney_first_name { sql: REGEXP_EXTRACT(${exm_attorney_name}, `, (\\w+)`) ;; } And the results: Explore From Here Joining the Tables Big query doesn’t let us join on expressions, so we have to move the dimension in a derived table. BigQuery is smart enough to optimize this out if we don’t use the expression when referencing this derived table in a query. Old LookML - view: case_file derived_table: sql: | SELECT *, REGEXP_EXTRACT(exm_attorney_name, ', (\\w+)') as exm_attorney_first_name FROM trademark.case_file New LookML view: case_file { derived_table: { sql: SELECT *, REGEXP_EXTRACT(exm_attorney_name, ', (\\w+)') as exm_attorney_first_name FROM trademark.case_file ;; } } Next we join in gender_guess to the case file. Old LookML - explore: case_file joins: - join: exm_attorney_gender from: gender_guess sql_on: ${case_file.exm_attorney_first_name} = ${exm_attorney_gender.name} relationship: many_to_one New LookML explore: case_file { join: exm_attorney_gender { from: gender_guess sql_on: ${case_file.exm_attorney_first_name} = ${exm_attorney_gender.name} ;; relationship: many_to_one } } Now we see names together with gender score (percentage male). Explore From Here Add Some Measures We’d like to be able to see the count of attorneys and the percentage of those attorneys that were male over time. Summing the probabilities distinctly on the attorney name will give us those counts. Old LookML - measure: count_male_cases type: number sql: SUM(${exm_attorney_gender.percentage_male}) - measure: percentage_male_cases type: number sql: ${count_male_cases}/${count} value_format_name: percent_2 - measure: count_attornies type: count_distinct sql: ${exm_attorney_name} - measure: count_male_attornies type: sum_distinct sql: ${exm_attorney_gender.percentage_male} sql_distinct_key: ${exm_attorney_name} - measure: percentage_male_attornies type: number sql: ${count_male_attornies}/${count_attornies} value_format_name: percent_2 New LookML measure: count_male_cases { type: number sql: SUM(${exm_attorney_gender.percentage_male}) ;; } measure: percentage_male_cases { type: number sql: ${count_male_cases}/${count} ;; value_format_name: percent_2 } measure: count_attornies { type: count_distinct sql: ${exm_attorney_name} ;; } measure: count_male_attornies { type: sum_distinct sql: ${exm_attorney_gender.percentage_male} ;; sql_distinct_key: ${exm_attorney_name} ;; } measure: percentage_male_attornies { type: number sql: ${count_male_attornies}/${count_attornies} ;; value_format_name: percent_2 } Gender Mix Over Time I looks like in 1978 the USPTO Examiner staff was 2/3’s Male, and that those men handled close to 90% of the case load. 10 years later, in 1988, that balance had changed to 50%, with about 50% of the case load being male. Since then, the examiner staff has become predominately female —now only 40% male, with only 40% of the case load being handled by male attorneys. Explore From Here
headline
Fun with Data: Calculating Gender Balance Using First Names
keywords
position
#1
datePublished
interactionStatistic
_:vb829604 _:vb829605
Subject Item
_:vb829602
rdf:type
http://schema.org/Organization
pwdr:describedby
n6:2526
name
Community
Subject Item
_:vb829603
rdf:type
http://schema.org/Person
pwdr:describedby
n6:2526
name
lloydtabb
url
Subject Item
_:vb829604
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/LikeAction
userInteractionCount
0
Subject Item
_:vb829605
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/CommentAction
userInteractionCount
0
Subject Item
_:vb829606
rdf:type
http://schema.org/DiscussionForumPosting
publisher
_:vb829607
author
pwdr:describedby
n6:2526
dateModified
2016-11-08T19:35:08Z
mainEntityOfPage
n2:2526
articleBody
Edited to add in new LookML
headline
Fun with Data: Calculating Gender Balance Using First Names
position
#2
datePublished
interactionStatistic
_:vb829610 _:vb829609
Subject Item
_:vb829607
rdf:type
http://schema.org/Organization
pwdr:describedby
n6:2526
name
Community
Subject Item
_:vb829608
rdf:type
http://schema.org/Person
pwdr:describedby
n6:2526
name
daniel_nelson_looker
url
Subject Item
_:vb829609
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/LikeAction
userInteractionCount
0
Subject Item
_:vb829610
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/CommentAction
userInteractionCount
0
Subject Item
_:vb829611
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Home
Subject Item
_:vb829612
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Categories
Subject Item
_:vb829613
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
FAQ/Guidelines
Subject Item
_:vb829614
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Terms of Service
Subject Item
_:vb829615
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Privacy Policy
Subject Item
_:vb829616
rdf:type
prv:DataAccess
pwdr:describedby
n17:2526 n6:2526
prv:performedAt
2020-10-25T02:21:01.469691
prv:performedBy
n21:spongerInstance
prv:accessedService
n2:2526
Subject Item
_:vb829617
rdf:type
prv:DataAccess
pwdr:describedby
n17:2526 n6:2526
prv:performedAt
2020-10-25T02:21:01.469691
prv:performedBy
n21:spongerInstance
prv:accessedService
n2:2526
Subject Item
_:vb829620
rdf:type
http://schema.org/BreadcrumbList
pwdr:describedby
n6:2526
itemListElement
_:vb829621
Subject Item
_:vb829621
rdf:type
http://schema.org/ListItem
pwdr:describedby
n6:2526
position
1
item
n24:54
name
Data Explorers Guild
Subject Item
_:vb829622
rdf:type
http://schema.org/DiscussionForumPosting
publisher
_:vb829623
author
pwdr:describedby
n6:2526
dateModified
2019-01-16T19:07:57Z
mainEntityOfPage
n2:2526
articleBody
In the The Name Game: Step 3 we were able to compute, given a name, the percent likelihood that the name is male. The calculation is computed by dividing the size of the male population for a given name by the total population for that name. Some Gender Neutral Names Explore From Here Building a Name Map We can use the same table in BigQuery and this simple query to build a lookup table that maps names to their likelihood of being male. If we don’t have data (later when we join and the join fails), we assume a 50% probability. Old LookML - view: gender_guess derived_table: sql: | SELECT UPPER(name) AS name , FLOAT(SUM(CASE WHEN gender = 'M' THEN number ELSE 0 END)) / SUM(number) AS percentage_male FROM [fh-bigquery:popular_names.usa_1910_2013] GROUP EACH BY 1 fields: - dimension: name - dimension: percentage_male type: number sql: COALESCE(${TABLE}.percentage_male, 0.5) New LookML view: gender_guess { derived_table: { sql: SELECT UPPER(name) AS name , FLOAT(SUM(CASE WHEN gender = 'M' THEN number ELSE 0 END)) / SUM(number) AS percentage_male FROM [fh-bigquery:popular_names.usa_1910_2013] GROUP EACH BY 1 ;; } dimension: name {} dimension: percentage_male { type: number sql: COALESCE(${TABLE}.percentage_male, 0.5) ;; } } Names and likelihood they are Male The names with Percentage Male = 1 are certainly male, the names with 0 are certainly female. The fractional names are somewhere between. Explore From Here Names and the United States Patent and Trademark Office (USPTO) I recently uploaded all the USPTO data to BigQuery. The main table in this dataset is ‘case_files’ and on each case file, there is the name of the attorney assigned to the case. Attorney Names Attorneys and the number of cases they’ve worked on. Explore From Here Parsing out the First Name It appears that names are of the form <LAST_NAME>, <FIRST_NAME> <INITIAL OR NAME> We can parse the pretty easily using a regular expression. First names appear to immediately follow the comma. We can codify this with a new dimension and a regular expression. Old LookML - dimension: exm_attourney_first_name sql: REGEXP_EXTRACT(${exm_attorney_name}, `, (\\w+)`) New LookML dimension: exm_attourney_first_name { sql: REGEXP_EXTRACT(${exm_attorney_name}, `, (\\w+)`) ;; } And the results: Explore From Here Joining the Tables Big query doesn’t let us join on expressions, so we have to move the dimension in a derived table. BigQuery is smart enough to optimize this out if we don’t use the expression when referencing this derived table in a query. Old LookML - view: case_file derived_table: sql: | SELECT *, REGEXP_EXTRACT(exm_attorney_name, ', (\\w+)') as exm_attorney_first_name FROM trademark.case_file New LookML view: case_file { derived_table: { sql: SELECT *, REGEXP_EXTRACT(exm_attorney_name, ', (\\w+)') as exm_attorney_first_name FROM trademark.case_file ;; } } Next we join in gender_guess to the case file. Old LookML - explore: case_file joins: - join: exm_attorney_gender from: gender_guess sql_on: ${case_file.exm_attorney_first_name} = ${exm_attorney_gender.name} relationship: many_to_one New LookML explore: case_file { join: exm_attorney_gender { from: gender_guess sql_on: ${case_file.exm_attorney_first_name} = ${exm_attorney_gender.name} ;; relationship: many_to_one } } Now we see names together with gender score (percentage male). Explore From Here Add Some Measures We’d like to be able to see the count of attorneys and the percentage of those attorneys that were male over time. Summing the probabilities distinctly on the attorney name will give us those counts. Old LookML - measure: count_male_cases type: number sql: SUM(${exm_attorney_gender.percentage_male}) - measure: percentage_male_cases type: number sql: ${count_male_cases}/${count} value_format_name: percent_2 - measure: count_attornies type: count_distinct sql: ${exm_attorney_name} - measure: count_male_attornies type: sum_distinct sql: ${exm_attorney_gender.percentage_male} sql_distinct_key: ${exm_attorney_name} - measure: percentage_male_attornies type: number sql: ${count_male_attornies}/${count_attornies} value_format_name: percent_2 New LookML measure: count_male_cases { type: number sql: SUM(${exm_attorney_gender.percentage_male}) ;; } measure: percentage_male_cases { type: number sql: ${count_male_cases}/${count} ;; value_format_name: percent_2 } measure: count_attornies { type: count_distinct sql: ${exm_attorney_name} ;; } measure: count_male_attornies { type: sum_distinct sql: ${exm_attorney_gender.percentage_male} ;; sql_distinct_key: ${exm_attorney_name} ;; } measure: percentage_male_attornies { type: number sql: ${count_male_attornies}/${count_attornies} ;; value_format_name: percent_2 } Gender Mix Over Time I looks like in 1978 the USPTO Examiner staff was 2/3’s Male, and that those men handled close to 90% of the case load. 10 years later, in 1988, that balance had changed to 50%, with about 50% of the case load being male. Since then, the examiner staff has become predominately female —now only 40% male, with only 40% of the case load being handled by male attorneys. Explore From Here
headline
Fun with Data: Calculating Gender Balance Using First Names
keywords
position
#1
datePublished
interactionStatistic
_:vb829626 _:vb829625
Subject Item
_:vb829623
rdf:type
http://schema.org/Organization
pwdr:describedby
n6:2526
name
Community
Subject Item
_:vb829624
rdf:type
http://schema.org/Person
pwdr:describedby
n6:2526
name
lloydtabb
url
Subject Item
_:vb829625
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/LikeAction
userInteractionCount
0
Subject Item
_:vb829626
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/CommentAction
userInteractionCount
0
Subject Item
_:vb829627
rdf:type
http://schema.org/DiscussionForumPosting
publisher
_:vb829628
author
pwdr:describedby
n6:2526
dateModified
2016-11-08T19:35:08Z
mainEntityOfPage
n2:2526
articleBody
Edited to add in new LookML
headline
Fun with Data: Calculating Gender Balance Using First Names
position
#2
datePublished
interactionStatistic
_:vb829630 _:vb829631
Subject Item
_:vb829628
rdf:type
http://schema.org/Organization
pwdr:describedby
n6:2526
name
Community
Subject Item
_:vb829629
rdf:type
http://schema.org/Person
pwdr:describedby
n6:2526
name
daniel_nelson_looker
url
Subject Item
_:vb829630
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/LikeAction
userInteractionCount
0
Subject Item
_:vb829631
rdf:type
http://schema.org/InteractionCounter
pwdr:describedby
n6:2526
interactionType
http://schema.org/CommentAction
userInteractionCount
0
Subject Item
_:vb829632
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Home
Subject Item
_:vb829633
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Categories
Subject Item
_:vb829634
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
FAQ/Guidelines
Subject Item
_:vb829635
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Terms of Service
Subject Item
_:vb829636
rdf:type
http://schema.org/SiteNavigationElement
pwdr:describedby
n6:2526
name
Privacy Policy