Added
luau
: qsv_register_lookup helper function now works with CSVs on URLs #860luau
: added support for "dathere://" lookup scheme, allowing users to conveniently load oft-used lookup tables from https://github.com/dathere/qsv-lookup-tables #861luau
: added detailed API definitions for Luau Helper FunctionsLines 1156 to 1497 in 605b38b
// ----------------------------------------------------------------------------- // HELPER FUNCTIONS // ----------------------------------------------------------------------------- // setup_helpers sets up some helper functions that can be called from Luau scripts fn setup_helpers(luau: &Lua, delimiter: Option<Delimiter>) -> Result<(), CliError> { // this is a helper function that can be called from Luau scripts // to send log messages to the logfile // the first parameter is the log level, and the following parameters are concatenated // // qsv_log(log_level, arg1, .., argN) // log_level: string, one of "info", "warn", "error", "debug", "trace". // if invalid log_level is provided, "info" is assumed. // arg1, argN: Up to 255 arguments to be concatenated and logged as one string. // returns: Luau table of header names excluding the first header, // or Luau runtime error if the lookup table could not be loaded // let qsv_log = luau.create_function(|luau, mut args: mlua::MultiValue| { let mut log_msg = { // at which stage are we logging? match LUAU_STAGE.load(Ordering::Relaxed) { BEGIN_STAGE => "BEGIN: ".to_string(), MAIN_STAGE => "MAIN: ".to_string(), END_STAGE => "END: ".to_string(), _ => String::new(), } }; let mut idx = 0_u8; let mut log_level = String::new(); while let Some(val) = args.pop_front() { let val = luau.from_value::<serde_json::Value>(val)?; let val_str = &serde_json::to_string_pretty(&val).unwrap_or_default(); if idx == 0 { log_level = val_str.trim_matches('"').to_lowercase(); } else { log_msg.push_str(val_str.trim_matches('"')); if idx == u8::MAX { break; } } idx += 1; } match log_level.as_str() { "info" => log::info!("{log_msg}"), "warn" => log::warn!("{log_msg}"), "error" => log::error!("{log_msg}"), "debug" => log::debug!("{log_msg}"), "trace" => log::trace!("{log_msg}"), _ => { log::info!("unknown log level: {log_level} msg: {log_msg}"); } } Ok(()) })?; luau.globals().set("qsv_log", qsv_log)?; // this is a helper function that can be called from Luau scripts // to coalesce - return the first non-null value in a list // // qsv_coalesce(arg1, .., argN) // returns: first non-null value of the arguments // or an empty string if all arguments are null // let qsv_coalesce = luau.create_function(|luau, mut args: mlua::MultiValue| { while let Some(val) = args.pop_front() { let val = luau.from_value::<serde_json::Value>(val)?; let val_str = val.as_str().unwrap_or_default(); if !val_str.is_empty() { return Ok(val_str.to_string()); } } Ok(String::new()) })?; luau.globals().set("qsv_coalesce", qsv_coalesce)?; // this is a helper function that can be called from the BEGIN and MAIN script // to stop processing. All the parameters are concatenated and returned as a string. // The string is also stored in the global variable _QSV_BREAK_MSG. // qsv_break should only be called from scripts that are processing CSVs in sequential mode. // When in random access mode, set _INDEX to -1 or a value greater than _LASTROW instead // // qsv_break(arg1, .., argN) // arg1, argN: up to 254 arguments // returns: concatenated args as one string or an empty string if no args are passed. // Luau runtime error if called from END script // let qsv_break = luau.create_function(|luau, mut args: mlua::MultiValue| { if LUAU_STAGE.load(Ordering::Relaxed) == END_STAGE { return Err(mlua::Error::RuntimeError( "qsv_break() can only be called from the BEGIN and MAIN scripts.".to_string(), )); } let mut break_msg = String::new(); let mut idx = 0_u8; while let Some(val) = args.pop_front() { let val = luau.from_value::<serde_json::Value>(val)?; let val_str = &serde_json::to_string_pretty(&val).unwrap_or_default(); break_msg.push_str(val_str.trim_matches('"')); if idx == u8::MAX { break; } idx += 1; } luau.globals().set("_QSV_BREAK_MSG", break_msg.clone())?; QSV_BREAK.store(true, Ordering::Relaxed); Ok(break_msg) })?; luau.globals().set("qsv_break", qsv_break)?; // this is a helper function that can be called from the MAIN script // to SKIP writing the output of that row. // // qsv_skip() // returns: None // or Luau runtime error if called from BEGIN or END scripts // let qsv_skip = luau.create_function(|_, ()| { if LUAU_STAGE.load(Ordering::Relaxed) != MAIN_STAGE { return Err(mlua::Error::RuntimeError( "qsv_skip() can only be called from the MAIN script.".to_string(), )); } QSV_SKIP.store(true, Ordering::Relaxed); Ok(()) })?; luau.globals().set("qsv_skip", qsv_skip)?; // this is a helper function that creates an index file for the current CSV. // It does not work for stdin and should only be called in the BEGIN script // its actually just a stub and the real function is called before processing // the BEGIN script. // Calling this will also initialize the _ROWCOUNT and _LASTROW special variables // so that the BEGIN script can use them // // qsv_autoindex() // returns: None as this is a stub function. // A Luau runtime error will be raised if the index cannot be created // as soon as the BEGIN script is actually executed. // A Luau runtime error is also returned if called from MAIN or END. // let qsv_autoindex = luau.create_function(|_, ()| { if LUAU_STAGE.load(Ordering::Relaxed) != BEGIN_STAGE { return Err(mlua::Error::RuntimeError( "qsv_autoindex() can only be called from the BEGIN script.".to_string(), )); } Ok(()) })?; luau.globals().set("qsv_autoindex", qsv_autoindex)?; // this is a helper function that can be called from the BEGIN, MAIN & END scripts to insert a // record It will automatically ignore excess columns, and fill up columns with // empty strings if there are less columns specified than expected. // Note that you can only insert ONE record in the BEGIN and END scripts // // qsv_insertrecord(col1, .., colN) // col1..N: the values to insert. If there are more columns than expected, the extra // columns will be ignored. If there are less columns than expected, the // missing columns will be filled with empty strings. // Up to 65,535 columns supported. // returns: None. Will always succeed. // let qsv_insertrecord = luau.create_function(|luau, mut args: mlua::MultiValue| { let args_len = args.len().try_into().unwrap_or(10_i32); let insertrecord_table = luau.create_table_with_capacity(args_len, 1)?; // Luau tables are 1-based let mut idx = 1_u16; while let Some(val) = args.pop_front() { let val = luau.from_value::<serde_json::Value>(val)?; let val_str = val.as_str().unwrap_or_default(); insertrecord_table.set(idx, val_str).unwrap(); idx += 1; if idx == u16::MAX { break; } } luau.globals() .set("_QSV_INSERTRECORD_TBL", insertrecord_table.clone())?; Ok(()) })?; luau.globals().set("qsv_insertrecord", qsv_insertrecord)?; // this is a helper function that can be called from the BEGIN script to register // and load a lookup table. It expects two arguments - the lookup_name & the // lookup_table_uri - the URI of the CSV to use as a lookup table. // It returns a table with the header names if successful and create a Luau table // named using lookup_name, storing all the lookup values. // The first column is the key and the rest of the columns are values stored in a // table indexed by column name. // // qsv_register(lookup_name, lookup_table_uri) // lookup_name: The name of the Luau table to load the CSV into // lookup_table_uri: The name of the CSV file to load. Note that it will use // the luau --delimiter option if specified. // This can be a file on the filesystem or on at a URL // ("http", "https" and "dathere" schemes supported). // The dathere scheme is used to access lookup-ready CSVs // on https://github.com/dathere/qsv-lookup-tables. // returns: Luau table of header names excluding the first header, // or Luau runtime error if the CSV could not be loaded // let qsv_register_lookup = luau.create_function(move |luau, mut args: mlua::MultiValue| { let args_len = args.len().try_into().unwrap_or(10_i32); if LUAU_STAGE.load(Ordering::Relaxed) != BEGIN_STAGE { return Err(mlua::Error::RuntimeError( "qsv_register_lookup() can only be called from the BEGIN script.".to_string(), )); } if args_len != 2 { return Err(mlua::Error::RuntimeError( "qsv_register_lookup() requires two arguments - lookup_name & lookup_table_uri" .to_string(), )); } let lookup_name = luau.from_value::<serde_json::Value>(args.pop_front().unwrap())?; let lookup_name_str = lookup_name.as_str().unwrap_or_default(); let lookup_table_uri = luau.from_value::<serde_json::Value>(args.pop_front().unwrap())?; let mut lookup_table_uri_string = lookup_table_uri.as_str().unwrap_or_default().to_string(); // if the lookup_table_uri starts with "dathere://", prepend the repo URL to the lookup table if let Some(lookup_url) = lookup_table_uri_string.strip_prefix("dathere://") { lookup_table_uri_string = format!("https://raw.githubusercontent.com/dathere/qsv-lookup-tables/main/lookup-tables/{lookup_url}"); } let lookup_on_url = lookup_table_uri_string.to_lowercase().starts_with("http"); // if lookup_on_url, create a temporary file and download CSV to it. // We do this outside the download proper below as the tempdir // needs to persist until the end of this helper function, when // it will be automatically deleted let mut temp_file = tempfile::NamedTempFile::new()?; if lookup_on_url { use reqwest::blocking::Client; let client_timeout = std::time::Duration::from_secs(TIMEOUT_SECS.load(Ordering::Relaxed) as u64); let client = match Client::builder() .user_agent(util::DEFAULT_USER_AGENT) .brotli(true) .gzip(true) .deflate(true) .use_rustls_tls() .http2_adaptive_window(true) .connection_verbose(log_enabled!(log::Level::Debug) || log_enabled!(log::Level::Trace)) .timeout(client_timeout) .build() { Ok(c) => c, Err(e) => { return Err(mlua::Error::RuntimeError(format!( "Cannot build reqwest client to download lookup CSV: {e}." ))); } }; let lookup_csv_contents = match client.get(lookup_table_uri_string).send() { Ok(response) => response.text().unwrap_or_default(), Err(e) => { return Err(mlua::Error::RuntimeError(format!( "Cannot read lookup CSV at url: {e}." ))); } }; temp_file.write_all(lookup_csv_contents.as_bytes())?; // we need to persist the tempfile so that we can pass the path to the CSV reader let (_lookup_file, lookup_file_path) = temp_file.keep().expect("Cannot persist tempfile"); lookup_table_uri_string = lookup_file_path.to_str().unwrap_or_default().to_string(); } let lookup_table = luau.create_table()?; #[allow(unused_assignments)] let mut record = csv::StringRecord::new(); let conf = Config::new(&Some(lookup_table_uri_string.clone())) .delimiter(delimiter) .no_headers(false); let mut rdr = conf.reader()?; let headers = match rdr.headers() { Ok(headers) => headers.clone(), Err(e) => { return Err(mlua::Error::RuntimeError(format!( "qsv_register_lookup() cannot read headers of lookup table: {e}" ))); } }; for result in rdr.records() { record = result.unwrap_or_default(); let key = record.get(0).unwrap_or_default().trim(); let inside_table = luau.create_table()?; for (i, header) in headers.iter().enumerate() { if i > 0 { let val = record.get(i).unwrap_or_default().trim(); inside_table.raw_set(header, val)?; } } lookup_table.raw_set(key, inside_table)?; } // if we downloaded the CSV to a temp file, we need to delete it if lookup_on_url { fs::remove_file(lookup_table_uri_string)?; } luau.globals() .raw_set(lookup_name_str, lookup_table.clone())?; // now that we've successfully loaded the lookup table, we return the headers // as a table so the user can use them to access the values let headers_table = luau.create_table()?; for (i, header) in headers.iter().enumerate() { // we do not include the first column, which is the key if i > 0 { headers_table.raw_set(i, header)?; } } Ok(headers_table) })?; luau.globals() .set("qsv_register_lookup", qsv_register_lookup)?; Ok(()) } validate
: added --timeout option when downloading JSON Schemas 605b38b
Changed
- remove all glob imports #857 and #858
- qsvdp (Datapusher+-optimized qsv binary variant) now has an embedded
luau
interpreter #859 validate
: JSON Schema url now case-insensitive 3123dc6- Bump serde from 1.0.155 to 1.0.156 by @dependabot in #862
- applied select clippy lint recommendations
- cargo update bump several indirect dependencies
- pin Rust nightly to 2023-03-14