github jqnatividad/qsv 0.93.0

latest releases: 0.138.0, 0.137.0, 0.136.0...
20 months ago

Added

  • luau: qsv_register_lookup helper function now works with CSVs on URLs #860
  • luau: added support for "dathere://" lookup scheme, allowing users to conveniently load oft-used lookup tables from https://github.com/dathere/qsv-lookup-tables #861
  • luau: added detailed API definitions for Luau Helper Functions

    qsv/src/cmd/luau.rs

    Lines 1156 to 1497 in 605b38b

    // -----------------------------------------------------------------------------
    // HELPER FUNCTIONS
    // -----------------------------------------------------------------------------
    // setup_helpers sets up some helper functions that can be called from Luau scripts
    fn setup_helpers(luau: &Lua, delimiter: Option<Delimiter>) -> Result<(), CliError> {
    // this is a helper function that can be called from Luau scripts
    // to send log messages to the logfile
    // the first parameter is the log level, and the following parameters are concatenated
    //
    // qsv_log(log_level, arg1, .., argN)
    // log_level: string, one of "info", "warn", "error", "debug", "trace".
    // if invalid log_level is provided, "info" is assumed.
    // arg1, argN: Up to 255 arguments to be concatenated and logged as one string.
    // returns: Luau table of header names excluding the first header,
    // or Luau runtime error if the lookup table could not be loaded
    //
    let qsv_log = luau.create_function(|luau, mut args: mlua::MultiValue| {
    let mut log_msg = {
    // at which stage are we logging?
    match LUAU_STAGE.load(Ordering::Relaxed) {
    BEGIN_STAGE => "BEGIN: ".to_string(),
    MAIN_STAGE => "MAIN: ".to_string(),
    END_STAGE => "END: ".to_string(),
    _ => String::new(),
    }
    };
    let mut idx = 0_u8;
    let mut log_level = String::new();
    while let Some(val) = args.pop_front() {
    let val = luau.from_value::<serde_json::Value>(val)?;
    let val_str = &serde_json::to_string_pretty(&val).unwrap_or_default();
    if idx == 0 {
    log_level = val_str.trim_matches('"').to_lowercase();
    } else {
    log_msg.push_str(val_str.trim_matches('"'));
    if idx == u8::MAX {
    break;
    }
    }
    idx += 1;
    }
    match log_level.as_str() {
    "info" => log::info!("{log_msg}"),
    "warn" => log::warn!("{log_msg}"),
    "error" => log::error!("{log_msg}"),
    "debug" => log::debug!("{log_msg}"),
    "trace" => log::trace!("{log_msg}"),
    _ => {
    log::info!("unknown log level: {log_level} msg: {log_msg}");
    }
    }
    Ok(())
    })?;
    luau.globals().set("qsv_log", qsv_log)?;
    // this is a helper function that can be called from Luau scripts
    // to coalesce - return the first non-null value in a list
    //
    // qsv_coalesce(arg1, .., argN)
    // returns: first non-null value of the arguments
    // or an empty string if all arguments are null
    //
    let qsv_coalesce = luau.create_function(|luau, mut args: mlua::MultiValue| {
    while let Some(val) = args.pop_front() {
    let val = luau.from_value::<serde_json::Value>(val)?;
    let val_str = val.as_str().unwrap_or_default();
    if !val_str.is_empty() {
    return Ok(val_str.to_string());
    }
    }
    Ok(String::new())
    })?;
    luau.globals().set("qsv_coalesce", qsv_coalesce)?;
    // this is a helper function that can be called from the BEGIN and MAIN script
    // to stop processing. All the parameters are concatenated and returned as a string.
    // The string is also stored in the global variable _QSV_BREAK_MSG.
    // qsv_break should only be called from scripts that are processing CSVs in sequential mode.
    // When in random access mode, set _INDEX to -1 or a value greater than _LASTROW instead
    //
    // qsv_break(arg1, .., argN)
    // arg1, argN: up to 254 arguments
    // returns: concatenated args as one string or an empty string if no args are passed.
    // Luau runtime error if called from END script
    //
    let qsv_break = luau.create_function(|luau, mut args: mlua::MultiValue| {
    if LUAU_STAGE.load(Ordering::Relaxed) == END_STAGE {
    return Err(mlua::Error::RuntimeError(
    "qsv_break() can only be called from the BEGIN and MAIN scripts.".to_string(),
    ));
    }
    let mut break_msg = String::new();
    let mut idx = 0_u8;
    while let Some(val) = args.pop_front() {
    let val = luau.from_value::<serde_json::Value>(val)?;
    let val_str = &serde_json::to_string_pretty(&val).unwrap_or_default();
    break_msg.push_str(val_str.trim_matches('"'));
    if idx == u8::MAX {
    break;
    }
    idx += 1;
    }
    luau.globals().set("_QSV_BREAK_MSG", break_msg.clone())?;
    QSV_BREAK.store(true, Ordering::Relaxed);
    Ok(break_msg)
    })?;
    luau.globals().set("qsv_break", qsv_break)?;
    // this is a helper function that can be called from the MAIN script
    // to SKIP writing the output of that row.
    //
    // qsv_skip()
    // returns: None
    // or Luau runtime error if called from BEGIN or END scripts
    //
    let qsv_skip = luau.create_function(|_, ()| {
    if LUAU_STAGE.load(Ordering::Relaxed) != MAIN_STAGE {
    return Err(mlua::Error::RuntimeError(
    "qsv_skip() can only be called from the MAIN script.".to_string(),
    ));
    }
    QSV_SKIP.store(true, Ordering::Relaxed);
    Ok(())
    })?;
    luau.globals().set("qsv_skip", qsv_skip)?;
    // this is a helper function that creates an index file for the current CSV.
    // It does not work for stdin and should only be called in the BEGIN script
    // its actually just a stub and the real function is called before processing
    // the BEGIN script.
    // Calling this will also initialize the _ROWCOUNT and _LASTROW special variables
    // so that the BEGIN script can use them
    //
    // qsv_autoindex()
    // returns: None as this is a stub function.
    // A Luau runtime error will be raised if the index cannot be created
    // as soon as the BEGIN script is actually executed.
    // A Luau runtime error is also returned if called from MAIN or END.
    //
    let qsv_autoindex = luau.create_function(|_, ()| {
    if LUAU_STAGE.load(Ordering::Relaxed) != BEGIN_STAGE {
    return Err(mlua::Error::RuntimeError(
    "qsv_autoindex() can only be called from the BEGIN script.".to_string(),
    ));
    }
    Ok(())
    })?;
    luau.globals().set("qsv_autoindex", qsv_autoindex)?;
    // this is a helper function that can be called from the BEGIN, MAIN & END scripts to insert a
    // record It will automatically ignore excess columns, and fill up columns with
    // empty strings if there are less columns specified than expected.
    // Note that you can only insert ONE record in the BEGIN and END scripts
    //
    // qsv_insertrecord(col1, .., colN)
    // col1..N: the values to insert. If there are more columns than expected, the extra
    // columns will be ignored. If there are less columns than expected, the
    // missing columns will be filled with empty strings.
    // Up to 65,535 columns supported.
    // returns: None. Will always succeed.
    //
    let qsv_insertrecord = luau.create_function(|luau, mut args: mlua::MultiValue| {
    let args_len = args.len().try_into().unwrap_or(10_i32);
    let insertrecord_table = luau.create_table_with_capacity(args_len, 1)?;
    // Luau tables are 1-based
    let mut idx = 1_u16;
    while let Some(val) = args.pop_front() {
    let val = luau.from_value::<serde_json::Value>(val)?;
    let val_str = val.as_str().unwrap_or_default();
    insertrecord_table.set(idx, val_str).unwrap();
    idx += 1;
    if idx == u16::MAX {
    break;
    }
    }
    luau.globals()
    .set("_QSV_INSERTRECORD_TBL", insertrecord_table.clone())?;
    Ok(())
    })?;
    luau.globals().set("qsv_insertrecord", qsv_insertrecord)?;
    // this is a helper function that can be called from the BEGIN script to register
    // and load a lookup table. It expects two arguments - the lookup_name & the
    // lookup_table_uri - the URI of the CSV to use as a lookup table.
    // It returns a table with the header names if successful and create a Luau table
    // named using lookup_name, storing all the lookup values.
    // The first column is the key and the rest of the columns are values stored in a
    // table indexed by column name.
    //
    // qsv_register(lookup_name, lookup_table_uri)
    // lookup_name: The name of the Luau table to load the CSV into
    // lookup_table_uri: The name of the CSV file to load. Note that it will use
    // the luau --delimiter option if specified.
    // This can be a file on the filesystem or on at a URL
    // ("http", "https" and "dathere" schemes supported).
    // The dathere scheme is used to access lookup-ready CSVs
    // on https://github.com/dathere/qsv-lookup-tables.
    // returns: Luau table of header names excluding the first header,
    // or Luau runtime error if the CSV could not be loaded
    //
    let qsv_register_lookup = luau.create_function(move |luau, mut args: mlua::MultiValue| {
    let args_len = args.len().try_into().unwrap_or(10_i32);
    if LUAU_STAGE.load(Ordering::Relaxed) != BEGIN_STAGE {
    return Err(mlua::Error::RuntimeError(
    "qsv_register_lookup() can only be called from the BEGIN script.".to_string(),
    ));
    }
    if args_len != 2 {
    return Err(mlua::Error::RuntimeError(
    "qsv_register_lookup() requires two arguments - lookup_name & lookup_table_uri"
    .to_string(),
    ));
    }
    let lookup_name = luau.from_value::<serde_json::Value>(args.pop_front().unwrap())?;
    let lookup_name_str = lookup_name.as_str().unwrap_or_default();
    let lookup_table_uri = luau.from_value::<serde_json::Value>(args.pop_front().unwrap())?;
    let mut lookup_table_uri_string = lookup_table_uri.as_str().unwrap_or_default().to_string();
    // if the lookup_table_uri starts with "dathere://", prepend the repo URL to the lookup table
    if let Some(lookup_url) = lookup_table_uri_string.strip_prefix("dathere://") {
    lookup_table_uri_string = format!("https://raw.githubusercontent.com/dathere/qsv-lookup-tables/main/lookup-tables/{lookup_url}");
    }
    let lookup_on_url = lookup_table_uri_string.to_lowercase().starts_with("http");
    // if lookup_on_url, create a temporary file and download CSV to it.
    // We do this outside the download proper below as the tempdir
    // needs to persist until the end of this helper function, when
    // it will be automatically deleted
    let mut temp_file = tempfile::NamedTempFile::new()?;
    if lookup_on_url {
    use reqwest::blocking::Client;
    let client_timeout = std::time::Duration::from_secs(TIMEOUT_SECS.load(Ordering::Relaxed) as u64);
    let client = match Client::builder()
    .user_agent(util::DEFAULT_USER_AGENT)
    .brotli(true)
    .gzip(true)
    .deflate(true)
    .use_rustls_tls()
    .http2_adaptive_window(true)
    .connection_verbose(log_enabled!(log::Level::Debug) || log_enabled!(log::Level::Trace))
    .timeout(client_timeout)
    .build()
    {
    Ok(c) => c,
    Err(e) => {
    return Err(mlua::Error::RuntimeError(format!(
    "Cannot build reqwest client to download lookup CSV: {e}."
    )));
    }
    };
    let lookup_csv_contents = match client.get(lookup_table_uri_string).send() {
    Ok(response) => response.text().unwrap_or_default(),
    Err(e) => {
    return Err(mlua::Error::RuntimeError(format!(
    "Cannot read lookup CSV at url: {e}."
    )));
    }
    };
    temp_file.write_all(lookup_csv_contents.as_bytes())?;
    // we need to persist the tempfile so that we can pass the path to the CSV reader
    let (_lookup_file, lookup_file_path) =
    temp_file.keep().expect("Cannot persist tempfile");
    lookup_table_uri_string = lookup_file_path.to_str().unwrap_or_default().to_string();
    }
    let lookup_table = luau.create_table()?;
    #[allow(unused_assignments)]
    let mut record = csv::StringRecord::new();
    let conf = Config::new(&Some(lookup_table_uri_string.clone()))
    .delimiter(delimiter)
    .no_headers(false);
    let mut rdr = conf.reader()?;
    let headers = match rdr.headers() {
    Ok(headers) => headers.clone(),
    Err(e) => {
    return Err(mlua::Error::RuntimeError(format!(
    "qsv_register_lookup() cannot read headers of lookup table: {e}"
    )));
    }
    };
    for result in rdr.records() {
    record = result.unwrap_or_default();
    let key = record.get(0).unwrap_or_default().trim();
    let inside_table = luau.create_table()?;
    for (i, header) in headers.iter().enumerate() {
    if i > 0 {
    let val = record.get(i).unwrap_or_default().trim();
    inside_table.raw_set(header, val)?;
    }
    }
    lookup_table.raw_set(key, inside_table)?;
    }
    // if we downloaded the CSV to a temp file, we need to delete it
    if lookup_on_url {
    fs::remove_file(lookup_table_uri_string)?;
    }
    luau.globals()
    .raw_set(lookup_name_str, lookup_table.clone())?;
    // now that we've successfully loaded the lookup table, we return the headers
    // as a table so the user can use them to access the values
    let headers_table = luau.create_table()?;
    for (i, header) in headers.iter().enumerate() {
    // we do not include the first column, which is the key
    if i > 0 {
    headers_table.raw_set(i, header)?;
    }
    }
    Ok(headers_table)
    })?;
    luau.globals()
    .set("qsv_register_lookup", qsv_register_lookup)?;
    Ok(())
    }
  • validate: added --timeout option when downloading JSON Schemas 605b38b

Changed

  • remove all glob imports #857 and #858
  • qsvdp (Datapusher+-optimized qsv binary variant) now has an embedded luau interpreter #859
  • validate: JSON Schema url now case-insensitive 3123dc6
  • Bump serde from 1.0.155 to 1.0.156 by @dependabot in #862
  • applied select clippy lint recommendations
  • cargo update bump several indirect dependencies
  • pin Rust nightly to 2023-03-14

Don't miss a new qsv release

NewReleases is sending notifications on new releases.