Skip to content
Snippets Groups Projects
Commit 8c2ea190 authored by Koute's avatar Koute Committed by GitHub
Browse files

Strip out control codes from the logged messages (#10081)

* Strip out control codes from the logged messages

* Also strip away C1 control codes

* Add extra comments

* Clear the buffer after flushing; rename `write` to `flush`

* Move control code stripping into its own function

* Also strip out control codes from panic messages

* Also strip out Unicode left-to-right/right-to-left control codes
parent 21c8d18c
No related merge requests found
......@@ -9462,6 +9462,8 @@ name = "sp-panic-handler"
version = "3.0.0"
dependencies = [
"backtrace",
"lazy_static",
"regex",
]
[[package]]
......
......@@ -62,7 +62,7 @@ where
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
let writer = &mut MaybeColorWriter::new(self.enable_color, writer);
let writer = &mut ControlCodeSanitizer::new(!self.enable_color, writer);
let normalized_meta = event.normalized_metadata();
let meta = normalized_meta.as_ref().unwrap_or_else(|| event.metadata());
time::write(&self.timer, writer, self.enable_color)?;
......@@ -100,10 +100,18 @@ where
}
}
// The writer only sanitizes its output once it's flushed, so if we don't actually need
// to sanitize everything we need to flush out what was already buffered as-is and only
// force-sanitize what follows.
if !writer.sanitize {
writer.flush()?;
writer.sanitize = true;
}
ctx.format_fields(writer, event)?;
writeln!(writer)?;
writer.write()
writer.flush()
}
}
......@@ -294,43 +302,60 @@ where
}
}
/// A writer that may write to `inner_writer` with colors.
/// A writer which (optionally) strips out terminal control codes from the logs.
///
/// This is used by [`EventFormat`] to kill colors when `enable_color` is `false`.
/// This is used by [`EventFormat`] to sanitize the log messages.
///
/// It is required to call [`MaybeColorWriter::write`] after all writes are done,
/// It is required to call [`ControlCodeSanitizer::flush`] after all writes are done,
/// because the content of these writes is buffered and will only be written to the
/// `inner_writer` at that point.
struct MaybeColorWriter<'a> {
enable_color: bool,
struct ControlCodeSanitizer<'a> {
sanitize: bool,
buffer: String,
inner_writer: &'a mut dyn fmt::Write,
}
impl<'a> fmt::Write for MaybeColorWriter<'a> {
impl<'a> fmt::Write for ControlCodeSanitizer<'a> {
fn write_str(&mut self, buf: &str) -> fmt::Result {
self.buffer.push_str(buf);
Ok(())
}
}
impl<'a> MaybeColorWriter<'a> {
// NOTE: When making any changes here make sure to also change this function in `sp-panic-handler`.
fn strip_control_codes(input: &str) -> std::borrow::Cow<str> {
lazy_static::lazy_static! {
static ref RE: Regex = Regex::new(r#"(?x)
\x1b\[[^m]+m| # VT100 escape codes
[
\x00-\x09\x0B-\x1F # ASCII control codes / Unicode C0 control codes, except \n
\x7F # ASCII delete
\u{80}-\u{9F} # Unicode C1 control codes
\u{202A}-\u{202E} # Unicode left-to-right / right-to-left control characters
\u{2066}-\u{2069} # Same as above
]
"#).expect("regex parsing doesn't fail; qed");
}
RE.replace_all(input, "")
}
impl<'a> ControlCodeSanitizer<'a> {
/// Creates a new instance.
fn new(enable_color: bool, inner_writer: &'a mut dyn fmt::Write) -> Self {
Self { enable_color, inner_writer, buffer: String::new() }
fn new(sanitize: bool, inner_writer: &'a mut dyn fmt::Write) -> Self {
Self { sanitize, inner_writer, buffer: String::new() }
}
/// Write the buffered content to the `inner_writer`.
fn write(&mut self) -> fmt::Result {
lazy_static::lazy_static! {
static ref RE: Regex = Regex::new("\x1b\\[[^m]+m").expect("Error initializing color regex");
}
if !self.enable_color {
let replaced = RE.replace_all(&self.buffer, "");
self.inner_writer.write_str(&replaced)
fn flush(&mut self) -> fmt::Result {
if self.sanitize {
let replaced = strip_control_codes(&self.buffer);
self.inner_writer.write_str(&replaced)?
} else {
self.inner_writer.write_str(&self.buffer)
self.inner_writer.write_str(&self.buffer)?
}
self.buffer.clear();
Ok(())
}
}
......@@ -595,4 +595,36 @@ mod tests {
assert!(stderr.contains(&line));
}
}
#[test]
fn control_characters_are_always_stripped_out_from_the_log_messages() {
const RAW_LINE: &str = "$$START$$\x1B[1;32mIn\u{202a}\u{202e}\u{2066}\u{2069}ner\n\r\x7ftext!\u{80}\u{9f}\x1B[0m$$END$$";
const SANITIZED_LINE: &str = "$$START$$Inner\ntext!$$END$$";
let output = run_test_in_another_process(
"control_characters_are_always_stripped_out_from_the_log_messages",
|| {
std::env::set_var("RUST_LOG", "trace");
let mut builder = LoggerBuilder::new("");
builder.with_colors(true);
builder.init().unwrap();
log::error!("{}", RAW_LINE);
},
);
if let Some(output) = output {
let stderr = String::from_utf8(output.stderr).unwrap();
// The log messages should always be sanitized.
assert!(!stderr.contains(RAW_LINE));
assert!(stderr.contains(SANITIZED_LINE));
// The part where the timestamp, the logging level, etc. is printed out doesn't
// always have to be sanitized unless it's necessary, and here it shouldn't be.
assert!(stderr.contains("\x1B[31mERROR\x1B[0m"));
// Make sure the logs aren't being duplicated.
assert_eq!(stderr.find("ERROR"), stderr.rfind("ERROR"));
assert_eq!(stderr.find(SANITIZED_LINE), stderr.rfind(SANITIZED_LINE));
}
}
}
......@@ -15,3 +15,5 @@ targets = ["x86_64-unknown-linux-gnu"]
[dependencies]
backtrace = "0.3.38"
regex = "1.5.4"
lazy_static = "1.4.0"
......@@ -25,6 +25,7 @@
//! temporarily be disabled by using an [`AbortGuard`].
use backtrace::Backtrace;
use regex::Regex;
use std::{
cell::Cell,
io::{self, Write},
......@@ -125,6 +126,24 @@ impl Drop for AbortGuard {
}
}
// NOTE: When making any changes here make sure to also change this function in `sc-tracing`.
fn strip_control_codes(input: &str) -> std::borrow::Cow<str> {
lazy_static::lazy_static! {
static ref RE: Regex = Regex::new(r#"(?x)
\x1b\[[^m]+m| # VT100 escape codes
[
\x00-\x09\x0B-\x1F # ASCII control codes / Unicode C0 control codes, except \n
\x7F # ASCII delete
\u{80}-\u{9F} # Unicode C1 control codes
\u{202A}-\u{202E} # Unicode left-to-right / right-to-left control characters
\u{2066}-\u{2069} # Same as above
]
"#).expect("regex parsing doesn't fail; qed");
}
RE.replace_all(input, "")
}
/// Function being called when a panic happens.
fn panic_hook(info: &PanicInfo, report_url: &str, version: &str) {
let location = info.location();
......@@ -139,6 +158,8 @@ fn panic_hook(info: &PanicInfo, report_url: &str, version: &str) {
},
};
let msg = strip_control_codes(&msg);
let thread = thread::current();
let name = thread.name().unwrap_or("<unnamed>");
......@@ -181,4 +202,44 @@ mod tests {
let _guard = AbortGuard::force_abort();
std::panic::catch_unwind(|| panic!()).ok();
}
fn run_test_in_another_process(
test_name: &str,
test_body: impl FnOnce(),
) -> Option<std::process::Output> {
if std::env::var("RUN_FORKED_TEST").is_ok() {
test_body();
None
} else {
let output = std::process::Command::new(std::env::current_exe().unwrap())
.arg(test_name)
.env("RUN_FORKED_TEST", "1")
.output()
.unwrap();
assert!(output.status.success());
Some(output)
}
}
#[test]
fn control_characters_are_always_stripped_out_from_the_panic_messages() {
const RAW_LINE: &str = "$$START$$\x1B[1;32mIn\u{202a}\u{202e}\u{2066}\u{2069}ner\n\r\x7ftext!\u{80}\u{9f}\x1B[0m$$END$$";
const SANITIZED_LINE: &str = "$$START$$Inner\ntext!$$END$$";
let output = run_test_in_another_process(
"control_characters_are_always_stripped_out_from_the_panic_messages",
|| {
set("test", "1.2.3");
let _guard = AbortGuard::force_unwind();
let _ = std::panic::catch_unwind(|| panic!("{}", RAW_LINE));
},
);
if let Some(output) = output {
let stderr = String::from_utf8(output.stderr).unwrap();
assert!(!stderr.contains(RAW_LINE));
assert!(stderr.contains(SANITIZED_LINE));
}
}
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment