// Copyright 2018-2020 Parity Technologies (UK) Ltd. // This file is part of cargo-contract. // // cargo-contract is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // cargo-contract is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with cargo-contract. If not, see . use super::{ Hex, Map, Tuple, Value, }; use escape8259::unescape; use nom::{ branch::alt, bytes::complete::{ tag, take_while1, }, character::complete::{ alphanumeric1, anychar, char, digit1, hex_digit1, multispace0, }, multi::{ many0, separated_list0, }, sequence::{ delimited, pair, separated_pair, tuple, }, AsChar, IResult, Parser, }; use nom_supreme::{ error::ErrorTree, ParserExt, }; use std::str::FromStr as _; /// Attempt to parse a SCON value pub fn parse_value(input: &str) -> anyhow::Result { let (_, value) = scon_value(input) .map_err(|err| anyhow::anyhow!("Error parsing Value: {}", err))?; Ok(value) } fn scon_value(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { ws(alt(( scon_unit, scon_hex, scon_seq, scon_tuple, scon_map, scon_string, scon_literal, scon_integer, scon_bool, scon_char, scon_unit_tuple, ))) .context("Value") .parse(input) } fn scon_string(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { #[derive(Debug)] struct UnescapeError(String); impl std::error::Error for UnescapeError {} impl std::fmt::Display for UnescapeError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "Error unescaping string '{}'", self.0) } } // One or more unescaped text characters let nonescaped_string = take_while1(|c| { let cv = c as u32; // A character that is: // NOT a control character (0x00 - 0x1F) // NOT a quote character (0x22) // NOT a backslash character (0x5C) // Is within the unicode range (< 0x10FFFF) (this is already guaranteed by Rust char) (cv >= 0x20) && (cv != 0x22) && (cv != 0x5C) }); // There are only two types of escape allowed by RFC 8259. // - single-character escapes \" \\ \/ \b \f \n \r \t // - general-purpose \uXXXX // Note: we don't enforce that escape codes are valid here. // There must be a decoder later on. let escape_code = pair( tag("\\"), alt(( tag("\""), tag("\\"), tag("/"), tag("b"), tag("f"), tag("n"), tag("r"), tag("t"), tag("u"), )), ) .recognize(); many0(alt((nonescaped_string, escape_code))) .recognize() .delimited_by(tag("\"")) .map_res::<_, _, UnescapeError>(|s: &str| { let unescaped = unescape(s).map_err(|_| UnescapeError(s.to_string()))?; Ok(Value::String(unescaped)) }) .parse(input) } fn rust_ident(input: &str) -> IResult<&str, &str, ErrorTree<&str>> { let alpha_or_underscore = anychar.verify(|c: &char| c.is_alpha() || *c == '_'); take_while1(|c: char| c.is_alphanumeric() || c == '_') .preceded_by(alpha_or_underscore.peek()) .parse(input) } /// Parse a signed or unsigned integer literal, supports optional Rust style underscore separators. fn scon_integer(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { let sign = alt((char('+'), char('-'))); pair(sign.opt(), separated_list0(char('_'), digit1)) .map_res(|(sign, parts)| { let digits = parts.join(""); if let Some(sign) = sign { let s = format!("{}{}", sign, digits); s.parse::().map(Value::Int) } else { digits.parse::().map(Value::UInt) } }) .parse(input) } fn scon_unit(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { let (i, _) = tag("()").parse(input)?; Ok((i, Value::Unit)) } fn scon_bool(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { alt(( tag("false").value(Value::Bool(false)), tag("true").value(Value::Bool(true)), )) .parse(input) } fn scon_char(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { anychar .delimited_by(char('\'')) .map(Value::Char) .parse(input) } fn scon_seq(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { separated_list0(ws(char(',')), scon_value) .preceded_by(ws(char('['))) .terminated(pair(ws(char(',')).opt(), ws(char(']')))) .map(|seq| Value::Seq(seq.into())) .parse(input) } fn scon_tuple(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { let tuple_body = separated_list0(ws(char(',')), scon_value) .preceded_by(ws(char('('))) .terminated(pair(ws(char(',')).opt(), ws(char(')')))); tuple((ws(rust_ident).opt(), tuple_body)) .map(|(ident, v)| Value::Tuple(Tuple::new(ident, v.into_iter().collect()))) .parse(input) } /// Parse a rust ident on its own which could represent a struct with no fields or a enum unit /// variant e.g. "None" fn scon_unit_tuple(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { rust_ident .map(|ident| Value::Tuple(Tuple::new(Some(ident), Vec::new()))) .parse(input) } fn scon_map(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { let opening = alt((tag("("), tag("{"))); let closing = alt((tag(")"), tag("}"))); let ident_key = rust_ident.map(|s| Value::String(s.into())); let scon_map_key = ws(alt((ident_key, scon_string, scon_integer))); let map_body = separated_list0( ws(char(',')), separated_pair(scon_map_key, ws(char(':')), scon_value), ) .preceded_by(ws(opening)) .terminated(pair(ws(char(',')).opt(), ws(closing))); tuple((ws(rust_ident).opt(), map_body)) .map(|(ident, v)| Value::Map(Map::new(ident, v.into_iter().collect()))) .parse(input) } fn scon_hex(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { tag("0x") .precedes(hex_digit1) .map_res::<_, _, hex::FromHexError>(|byte_str| { let hex = Hex::from_str(byte_str)?; Ok(Value::Hex(hex)) }) .parse(input) } /// Parse any alphanumeric literal with more than 39 characters (the length of `u128::MAX`) /// /// This is suitable for capturing e.g. Base58 encoded literals for Substrate addresses fn scon_literal(input: &str) -> IResult<&str, Value, ErrorTree<&str>> { const MAX_UINT_LEN: usize = 39; alphanumeric1 .verify(|s: &&str| s.len() > MAX_UINT_LEN) .recognize() .map(|literal: &str| Value::Literal(literal.to_string())) .parse(input) } fn ws(f: F) -> impl FnMut(I) -> IResult where F: FnMut(I) -> IResult, I: nom::InputTakeAtPosition, ::Item: nom::AsChar + Clone, E: nom::error::ParseError, { delimited(multispace0, f, multispace0) } #[cfg(test)] mod tests { use super::*; use assert_matches::assert_matches; fn assert_scon_value(input: &str, expected: Value) { assert_eq!(scon_value(input).unwrap(), ("", expected)); } #[test] fn test_parse_value() { assert_eq!(parse_value("true").unwrap(), Value::Bool(true)) } #[test] fn test_unit() { assert_eq!(scon_value("()").unwrap(), ("", Value::Unit)); } #[test] fn test_bool() { assert_eq!(scon_bool("false").unwrap(), ("", Value::Bool(false))); assert_eq!(scon_bool("true").unwrap(), ("", Value::Bool(true))); assert!(scon_bool("foo").is_err()); } #[test] fn test_integer() { assert_eq!(scon_integer("42").unwrap(), ("", Value::UInt(42))); assert_eq!(scon_integer("-123").unwrap(), ("", Value::Int(-123))); assert_eq!(scon_integer("+456").unwrap(), ("", Value::Int(456))); assert_eq!(scon_integer("0").unwrap(), ("", Value::UInt(0))); assert_eq!(scon_integer("01").unwrap(), ("", Value::UInt(1))); assert_eq!( scon_integer("340282366920938463463374607431768211455").unwrap(), ("", Value::UInt(340282366920938463463374607431768211455)) ); // underscore separators assert_eq!( scon_integer("1_000_000").unwrap(), ("", Value::UInt(1_000_000)) ); assert_eq!( scon_integer("-2_000_000").unwrap(), ("", Value::Int(-2_000_000)) ); assert_eq!( scon_integer("+3_000_000").unwrap(), ("", Value::Int(3_000_000)) ); assert_eq!( scon_integer("340_282_366_920_938_463_463_374_607_431_768_211_455").unwrap(), ("", Value::UInt(340282366920938463463374607431768211455)) ); // too many digits assert_matches!( scon_integer("3402823669209384634633746074317682114550"), Err(nom::Err::Error(_)) ); assert_matches!(scon_integer("abc123"), Err(nom::Err::Error(_))); } #[test] fn test_string() { // Plain Unicode strings with no escaping assert_eq!( scon_string(r#""""#).unwrap(), ("", Value::String("".into())) ); assert_eq!( scon_string(r#""Hello""#).unwrap(), ("", Value::String("Hello".into())) ); assert_eq!( scon_string(r#""の""#).unwrap(), ("", Value::String("の".into())) ); assert_eq!( scon_string(r#""𝄞""#).unwrap(), ("", Value::String("𝄞".into())) ); // valid 2-character escapes assert_eq!( scon_string(r#"" \\ ""#).unwrap(), ("", Value::String(" \\ ".into())) ); assert_eq!( scon_string(r#"" \" ""#).unwrap(), ("", Value::String(" \" ".into())) ); // valid 6-character escapes assert_eq!( scon_string(r#""\u0000""#).unwrap(), ("", Value::String("\x00".into())) ); assert_eq!( scon_string(r#""\u00DF""#).unwrap(), ("", Value::String("ß".into())) ); assert_eq!( scon_string(r#""\uD834\uDD1E""#).unwrap(), ("", Value::String("𝄞".into())) ); // Invalid because surrogate characters must come in pairs assert!(scon_string(r#""\ud800""#).is_err()); // Unknown 2-character escape assert!(scon_string(r#""\x""#).is_err()); // Not enough hex digits assert!(scon_string(r#""\u""#).is_err()); assert!(scon_string(r#""\u001""#).is_err()); // Naked control character assert!(scon_string(r#""\x0a""#).is_err()); // Not a JSON string because it's not wrapped in quotes assert!(scon_string("abc").is_err()); // An unterminated string (because the trailing quote is escaped) assert!(scon_string(r#""\""#).is_err()); // Parses correctly but has escape errors due to incomplete surrogate pair. assert_matches!(scon_string(r#""\ud800""#), Err(nom::Err::Error(_))); } #[test] fn test_seq() { assert_eq!(scon_value("[ ]").unwrap(), ("", Value::Seq(vec![].into()))); assert_eq!( scon_value("[ 1 ]").unwrap(), ("", Value::Seq(vec![Value::UInt(1)].into())) ); let expected = Value::Seq(vec![Value::UInt(1), Value::String("x".into())].into()); assert_eq!(scon_value(r#" [ 1 , "x" ] "#).unwrap(), ("", expected)); let trailing = r#"["a", "b",]"#; assert_eq!( scon_value(trailing).unwrap(), ( "", Value::Seq( vec![Value::String("a".into()), Value::String("b".into())].into() ) ) ); } #[test] fn test_rust_ident() { assert_eq!(rust_ident("a").unwrap(), ("", "a")); assert_eq!(rust_ident("a:").unwrap(), (":", "a")); assert_eq!(rust_ident("Ok").unwrap(), ("", "Ok")); assert_eq!(rust_ident("_ok").unwrap(), ("", "_ok")); assert_eq!(rust_ident("im_ok").unwrap(), ("", "im_ok")); assert_eq!(rust_ident("im_ok_").unwrap(), ("", "im_ok_")); assert_eq!(rust_ident("im_ok_123abc").unwrap(), ("", "im_ok_123abc")); assert!(rust_ident("1notok").is_err()); } #[test] fn test_literal() { assert_eq!( scon_literal("5GrwvaEF5zXb26Fz9rcQpDWS57CtERHpNehXCPcNoHGKutQY").unwrap(), ( "", Value::Literal("5GrwvaEF5zXb26Fz9rcQpDWS57CtERHpNehXCPcNoHGKutQY".into()) ) ); assert_eq!( scon_literal("5GrwvaEF5zXb26Fz9rcQpDWS57CtERHpNehXCPcNoHGKutQY").unwrap(), ( "", Value::Literal("5GrwvaEF5zXb26Fz9rcQpDWS57CtERHpNehXCPcNoHGKutQY".into()) ) ); assert_scon_value( "5GrwvaEF5zXb26Fz9rcQpDWS57CtERHpNehXCPcNoHGKutQY", Value::Literal("5GrwvaEF5zXb26Fz9rcQpDWS57CtERHpNehXCPcNoHGKutQY".into()), ); } #[test] fn test_map() { assert_eq!( scon_value("Foo {}").unwrap(), ("", Value::Map(Map::new(Some("Foo"), Default::default()))) ); assert_eq!( scon_value("Foo{}").unwrap(), ("", Value::Map(Map::new(Some("Foo"), Default::default()))) ); assert_eq!(rust_ident("a:").unwrap(), (":", "a")); assert_eq!( scon_value(r#"(a: 1)"#).unwrap(), ( "", Value::Map(Map::new( None, vec![(Value::String("a".into()), Value::UInt(1)),] .into_iter() .collect() )) ) ); assert_eq!( scon_value(r#"A (a: 1, b: "bar")"#).unwrap(), ( "", Value::Map(Map::new( Some("A"), vec![ (Value::String("a".into()), Value::UInt(1)), (Value::String("b".into()), Value::String("bar".into())), ] .into_iter() .collect() )) ) ); assert_eq!( scon_value(r#"B(a: 1)"#).unwrap(), ( "", Value::Map(Map::new( Some("B"), vec![(Value::String("a".into()), Value::UInt(1)),] .into_iter() .collect() )) ) ); assert_eq!( scon_value(r#"Struct { a : 1 }"#).unwrap(), ( "", Value::Map(Map::new( Some("Struct"), vec![(Value::String("a".into()), Value::UInt(1)),] .into_iter() .collect() )) ) ); let map = r#"Mixed { 1: "a", "b": 2, c: true, }"#; assert_eq!( scon_value(map).unwrap(), ( "", Value::Map(Map::new( Some("Mixed"), vec![ (Value::UInt(1), Value::String("a".into())), (Value::String("b".into()), Value::UInt(2)), (Value::String("c".into()), Value::Bool(true)), // (Value::String("d".into()), Value::Literal("5ALiteral".into())), ] .into_iter() .collect() )) ) ); } #[test] fn test_tuple() { assert_scon_value("Foo ()", Value::Tuple(Tuple::new(Some("Foo"), vec![]))); assert_scon_value("Foo()", Value::Tuple(Tuple::new(Some("Foo"), vec![]))); assert_scon_value("Foo", Value::Tuple(Tuple::new(Some("Foo"), vec![]))); assert_scon_value( r#"B("a")"#, Value::Tuple(Tuple::new(Some("B"), vec![Value::String("a".into())])), ); assert_scon_value( r#"B("a", 10, true)"#, Value::Tuple(Tuple::new( Some("B"), vec![ Value::String("a".into()), Value::UInt(10), Value::Bool(true), ], )), ); assert_scon_value( r#"Mixed ("a", 10, ["a", "b", "c"],)"#, Value::Tuple(Tuple::new( Some("Mixed"), vec![ Value::String("a".into()), Value::UInt(10), Value::Seq( vec![ Value::String("a".into()), Value::String("b".into()), Value::String("c".into()), ] .into(), ), ], )), ); assert_scon_value( r#"(Nested("a", 10))"#, Value::Tuple(Tuple::new( None, vec![Value::Tuple(Tuple::new( Some("Nested"), vec![Value::String("a".into()), Value::UInt(10)], ))], )), ) } #[test] fn test_option() { assert_scon_value( r#"Some("a")"#, Value::Tuple(Tuple::new(Some("Some"), vec![Value::String("a".into())])), ); assert_scon_value( r#"None"#, Value::Tuple(Tuple::new(Some("None"), Vec::new())), ); } #[test] fn test_char() { assert_scon_value(r#"'c'"#, Value::Char('c')); } #[test] fn test_bytes() { assert_scon_value("0x0000", Value::Hex(Hex::from_str("0x0000").unwrap())); assert_scon_value( "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", Value::Hex( Hex::from_str("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF") .unwrap(), ), ); } }