To Nha Notes | Aug. 15, 2022, 2:42 p.m.
Sample Parquet data file (cities.parquet)
The following is a representative row in the sample Parquet file:
{
"continent": "Europe",
"country": {
"city": [
"Paris",
"Nice",
"Marseilles",
"Cannes"
],
"name": "France"
}
}
/* Create a target relational table for the Parquet data. The table is temporary, meaning it persists only */ /* for the duration of the user session and is not visible to other users. */ create or replace temporary table cities ( continent varchar default null, country varchar default null, city variant default null ); /* Create a file format object that specifies the Parquet file format type. */ /* Accepts the default values for other options. */ create or replace file format sf_tut_parquet_format type = 'parquet'; /* Create a temporary internal stage that references the file format object. */ /* Similar to temporary tables, temporary stages are automatically dropped at the end of the session. */ create or replace temporary stage sf_tut_stage file_format = sf_tut_parquet_format; /* Stage the data file. */ /* */ /* Note that the example PUT statement references the macOS or Linux location of the data file. */ /* If you are using Windows, execute the following statement instead: */ -- put file://%TEMP%/cities.parquet @sf_tut_stage; put file:///tmp/cities.parquet @sf_tut_stage; /* Load the Parquet data into the relational table. */ /* */ /* A SELECT query in the COPY statement identifies a numbered set of columns in the data files you are */ /* loading from. Note that all Parquet data is stored in a single column ($1). */ /* */ /* Cast element values to the target column data type. */ copy into cities from (select $1:continent::varchar, $1:country:name::varchar, $1:country:city::variant from @sf_tut_stage/cities.parquet); /* Query the relational table */ select * from cities;
/* Unload the CITIES table columns into a Parquet file. Optionally flatten the CITY column array and unload */ /* the child elements to a separate column. */ /* */ /* To retain the column names in the output file, use the HEADER = TRUE copy option. */ copy into @sf_tut_stage/out/parquet_ from ( select continent , country , c.value::string as city from cities , lateral flatten(input => city) c) file_format = (type = 'parquet') header = true; /* Query the staged Parquet file. */ select t.$1 from @sf_tut_stage/out/ t;