-
Notifications
You must be signed in to change notification settings - Fork 0
/
Chinese AI ecosystem
311 lines (311 loc) · 7.75 KB
/
Chinese AI ecosystem
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#Venture capital investment
WITH
# table with basic company information
comp_stage AS (
SELECT
* EXCEPT(short_description,
description),
CONCAT(coalesce(short_description, ''),' ', coalesce(description, '')) AS description
FROM (
SELECT
uuid,
name AS target_name,
cb_url,
country_code AS target_country,
short_description,
category_list,
category_groups_list
FROM
gcp_cset_crunchbase.organizations )
LEFT JOIN (
SELECT
uuid,
description
FROM
gcp_cset_crunchbase.organization_descriptions)
USING
(uuid) ),
add_ipo_tab AS (
SELECT
uuid,
target_name,
cb_url,
target_country,
category_list,
category_groups_list,
IF
(ipo IS NULL,
0,
ipo) AS ipo,
shared_functions.isAICompany(description) AS ai_company,
IF
(category_groups_list LIKE '%Artificial Intelligence%',
TRUE,
FALSE) AS ai_category_cb,
FROM
comp_stage
LEFT JOIN (
SELECT
1 AS ipo,
org_uuid AS uuid
FROM
gcp_cset_crunchbase.ipos)
USING
(uuid) ),
# VC funding rounds target-investor pairs
funding_round_stage AS (
SELECT
target_uuid,
investor_uuid,
investor_name,
org_name as target_name,
COUNT(investor_uuid) OVER(PARTITION BY funding_round) AS n_investors,
raised_amount_usd AS investment_value,
investment_type,
funding_round,
EXTRACT(YEAR
FROM
announced_on) AS year
FROM (
SELECT
funding_round_uuid AS funding_round,
investor_uuid,
investor_name
FROM
gcp_cset_crunchbase.investments)
INNER JOIN (
SELECT
uuid AS funding_round,
org_uuid AS target_uuid,
org_name,
raised_amount_usd,
announced_on,
investment_type
FROM
gcp_cset_crunchbase.funding_rounds)
USING
(funding_round) ),
# add MA target-investor pairs
# add investor country to staging round
funding_round AS (
SELECT
*
FROM
funding_round_stage
LEFT JOIN (
SELECT
uuid AS investor_uuid,
country_code AS investor_country
FROM
gcp_cset_crunchbase.investors)
USING
(investor_uuid) ),
add_mergers AS (
SELECT
target_uuid,
investor_uuid,
* EXCEPT(target_uuid,
investor_uuid)
FROM
funding_round
UNION ALL (
SELECT
acquiree_uuid AS target_uuid,
acquirer_uuid AS investor_uuid,
acquirer_name as investor_name,
acquiree_name AS target_name,
1 AS n_investors,
price_usd AS investment_value,
'merge' AS investment_type,
CONCAT( acquirer_uuid,acquiree_uuid) AS funding_round,
EXTRACT(YEAR
FROM
acquired_on) AS year,
acquirer_country_code AS investor_target_country
FROM
gcp_cset_crunchbase.acquisitions) ),
-- /* add ai companies to investments */
add_ai_to_inv AS (
SELECT
* EXCEPT (target_name),
COALESCE (a.target_name, b.target_name) as target_name
FROM
add_mergers a
FULL JOIN
add_ipo_tab b
ON
a.target_uuid = b.uuid),
/* where ai_category_cb is True OR ai_company is True */ distinct_rounds AS (
SELECT
DISTINCT target_uuid,
funding_round,
investor_uuid,
investment_value,
IFNULL(investment_type,
'missing') AS investment_type,
uuid,
target_name,
investor_name,
cb_url,
IFNULL(target_country,
'missing') AS target_country,
category_list,
category_groups_list,
IFNULL(ipo,
0) AS ipo,
IFNULL(ai_company,
FALSE) AS ai_company,
ai_category_cb,
IFNULL(investor_country,
'missing') AS investor_country,
year
FROM
add_ai_to_inv ),
imp_step1 AS (
SELECT
* EXCEPT(imputed_investment_value_step),
IFNULL(investment_value,
imputed_investment_value_step) AS imputed_investment_value
FROM (
SELECT
*,
PERCENTILE_CONT(investment_value,
0.5) OVER(PARTITION BY investment_type, ai_company, ipo, target_country, year) AS imputed_investment_value_step
FROM
distinct_rounds ) ),
imp_step2 AS (
SELECT
* EXCEPT(imputed_investment_value,
imputed_investment_value_step),
IFNULL(imputed_investment_value,
imputed_investment_value_step) AS imputed_investment_value
FROM (
SELECT
*,
PERCENTILE_CONT(investment_value,
0.5) OVER(PARTITION BY investment_type, ai_company, target_country, year) AS imputed_investment_value_step
FROM
imp_step1 ) ),
imp_step3 AS (
SELECT
* EXCEPT(imputed_investment_value,
imputed_investment_value_step),
IFNULL(imputed_investment_value,
imputed_investment_value_step) AS imputed_investment_value
FROM (
SELECT
*,
PERCENTILE_CONT(investment_value,
0.5) OVER(PARTITION BY investment_type, ai_company, target_country) AS imputed_investment_value_step
FROM
imp_step2 ) ),
imp_step4 AS (
SELECT
* EXCEPT(imputed_investment_value,
imputed_investment_value_step),
IFNULL(imputed_investment_value,
imputed_investment_value_step) AS imputed_investment_value
FROM (
SELECT
*,
PERCENTILE_CONT(investment_value,
0.5) OVER(PARTITION BY ai_company, target_country) AS imputed_investment_value_step
FROM
imp_step3 ) ),
imp_step5 AS (
SELECT
* EXCEPT(imputed_investment_value,
imputed_investment_value_step),
IFNULL(imputed_investment_value,
imputed_investment_value_step) AS imputed_investment_value
FROM (
SELECT
*,
PERCENTILE_CONT(investment_value,
0.5) OVER(PARTITION BY ai_company) AS imputed_investment_value_step
FROM
imp_step4 ) ),
imp_step6 AS (
SELECT
* EXCEPT(imputed_investment_value,
imputed_investment_value_step),
IFNULL(imputed_investment_value,
imputed_investment_value_step) AS imputed_investment_value
FROM (
SELECT
*,
PERCENTILE_CONT(investment_value,
0.5) OVER() AS imputed_investment_value_step
FROM
imp_step5 ) ),
#make outbound AI table
outbound_ai AS (
SELECT
DISTINCT
*
FROM
imp_step6
WHERE
(ai_company IS TRUE
OR ai_category_cb IS TRUE)
AND (target_country = "CHN" or target_country = "HKG")
AND (year >= 2015)
AND (year <= 2021)
AND ( investment_type != "missing"
AND investment_type != "debt_financing"
AND investment_type != "equity_crowdfunding"
AND investment_type != "grant"
AND investment_type != "non_equity_assistance"
AND investment_type != "post_ipo_debt"
AND investment_type != "product_crowdfunding"
AND investment_type != "post_ipo_debt")),
#where investor's country code is missing, replace with people's org's country code
people_tab AS (
SELECT
* EXCEPT (investor_country),
COALESCE(CASE WHEN investor_country = "missing" THEN NULL ELSE investor_country END,
org.country_code) AS investor_country,
FROM
outbound_ai
LEFT JOIN
`gcp-cset-projects.gcp_cset_crunchbase.people` AS people
ON
investor_uuid = people.uuid
LEFT JOIN
`gcp-cset-projects.gcp_cset_crunchbase.organizations` AS org
ON
people.featured_job_organization_uuid = org.uuid),
combine AS(
# combine HKG with CHN
SELECT
* EXCEPT(investor_country,
target_country),
IF
(investor_country = "HKG",
"CHN",
investor_country) AS investor_country,
IF
(target_country = "HKG",
"CHN",
target_country) AS target_country
FROM
people_tab)
SELECT
DISTINCT investment_type,
target_name,
string_agg (investor_name, "; ") as investor_names,
year,
imputed_investment_value,
string_agg (investor_country, "; ") as investor_countries,
target_country
FROM
combine
where investor_country is not null
group by
investment_type,
target_name,
year,
imputed_investment_value,
target_country
order by
target_name