Skip to content

Commit

Permalink
Point at invalid utf-8 span on user's source code
Browse files Browse the repository at this point in the history
```
error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8
  --> $DIR/not-utf8-2.rs:6:5
   |
LL |     include!("not-utf8-bin-file.rs");
   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   |
note: `[193]` is not valid utf-8
  --> $DIR/not-utf8-bin-file.rs:2:14
   |
LL |     let _ = "�|�␂!5�cc␕␂��";
   |              ^
   = note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
```

When we attempt to load a Rust source code file, if there is a OS file failure we try reading the file as bytes. If that succeeds we try to turn it into UTF-8. If *that* fails, we provide additional context about *where* the file has the first invalid UTF-8 character.

Fix #76869.
  • Loading branch information
estebank committed Jan 15, 2025
1 parent 27f3361 commit 472d719
Show file tree
Hide file tree
Showing 17 changed files with 111 additions and 14 deletions.
1 change: 1 addition & 0 deletions compiler/rustc_builtin_macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![feature(proc_macro_internals)]
#![feature(proc_macro_quote)]
#![feature(rustdoc_internals)]
#![feature(string_from_utf8_lossy_owned)]
#![feature(try_blocks)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end
Expand Down
32 changes: 29 additions & 3 deletions compiler/rustc_builtin_macros/src/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,34 @@ pub(crate) fn expand_include_str(
MacEager::expr(cx.expr_str(cx.with_def_site_ctxt(bsp), interned_src))
}
Err(_) => {
let guar = cx.dcx().span_err(sp, format!("`{path}` wasn't a utf-8 file"));
DummyResult::any(sp, guar)
let mut err = cx.dcx().struct_span_err(sp, format!("`{path}` wasn't a utf-8 file"));
let path = PathBuf::from(path.as_str());
if let Ok(contents) = std::fs::read(&path)
&& let Err(utf8err) = String::from_utf8(contents)
{
let start = utf8err.utf8_error().valid_up_to();
let note = format!("invalid utf-8 at byte `{start}`");
let msg = if let Some(len) = utf8err.utf8_error().error_len() {
format!(
"`{:?}` is not valid utf-8",
&utf8err.as_bytes()[start..start + len]
)
} else {
note.clone()
};
let contents = utf8err.into_utf8_lossy();
let source = cx.source_map().new_source_file(path.into(), contents);
let span = Span::with_root_ctxt(
source.normalized_byte_pos(start as u32),
source.normalized_byte_pos(start as u32),
);
if span.is_dummy() {
err.note(note);
} else {
err.span_note(span, msg);
}
}
DummyResult::any(sp, err.emit())
}
},
Err(dummy) => dummy,
Expand Down Expand Up @@ -273,7 +299,7 @@ fn load_binary_file(
.and_then(|path| path.into_os_string().into_string().ok());

if let Some(new_path) = new_path {
err.span_suggestion(
err.span_suggestion_verbose(
path_span,
"there is a file with the same name in a different directory",
format!("\"{}\"", new_path.replace('\\', "/").escape_debug()),
Expand Down
31 changes: 30 additions & 1 deletion compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#![feature(if_let_guard)]
#![feature(iter_intersperse)]
#![feature(let_chains)]
#![feature(string_from_utf8_lossy_owned)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end

Expand Down Expand Up @@ -74,8 +75,36 @@ pub fn new_parser_from_file<'a>(
sp: Option<Span>,
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
let source_file = psess.source_map().load_file(path).unwrap_or_else(|e| {
let msg = format!("couldn't read {}: {}", path.display(), e);
let msg = format!("couldn't read `{}`: {}", path.display(), e);
let mut err = psess.dcx().struct_fatal(msg);
if let Ok(contents) = std::fs::read(path)
&& let Err(utf8err) = String::from_utf8(contents)
{
// The file exists, but it wasn't valid UTF-8.
let start = utf8err.utf8_error().valid_up_to();
let note = format!("invalid utf-8 at byte `{start}`");
let msg = if let Some(len) = utf8err.utf8_error().error_len() {
format!("`{:?}` is not valid utf-8", &utf8err.as_bytes()[start..start + len])
} else {
note.clone()
};
let contents = utf8err.into_utf8_lossy();
let source = psess.source_map().new_source_file(path.to_owned().into(), contents);
let span = Span::with_root_ctxt(
source.normalized_byte_pos(start as u32),
source.normalized_byte_pos(start as u32),
);
if span.is_dummy() {
err.note(note);
} else {
if sp.is_some() {
err.span_note(span, msg);
} else {
err.span(span);
err.span_label(span, msg);
}
}
}
if let Some(sp) = sp {
err.span(sp);
}
Expand Down
2 changes: 2 additions & 0 deletions src/tools/compiletest/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ pub fn load_errors(testfile: &Path, revision: Option<&str>) -> Vec<Error> {

rdr.lines()
.enumerate()
// We want to ignore utf-8 failures in tests during collection of annotations.
.filter(|(_, line)| line.is_ok())
.filter_map(|(line_num, line)| {
parse_expected(last_nonfollow_error, line_num + 1, &line.unwrap(), revision).map(
|(which, error)| {
Expand Down
7 changes: 7 additions & 0 deletions tests/ui/macros/not-utf8-2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//@ error-pattern: did not contain valid UTF-8
//@ reference: input.encoding.utf8
//@ reference: input.encoding.invalid

fn foo() {
include!("not-utf8-bin-file.rs");
}
15 changes: 15 additions & 0 deletions tests/ui/macros/not-utf8-2.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8
--> $DIR/not-utf8-2.rs:6:5
|
LL | include!("not-utf8-bin-file.rs");
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
note: `[193]` is not valid utf-8
--> $DIR/not-utf8-bin-file.rs:2:14
|
LL | let _ = "�|�␂!5�cc␕␂��";
| ^
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)

error: aborting due to 1 previous error

4 changes: 4 additions & 0 deletions tests/ui/macros/not-utf8-bin-file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn main() {
let _ = "Á|Õ!5¢ccŒÓ";
//~^ ERROR stream did not contain valid UTF-8
}
8 changes: 8 additions & 0 deletions tests/ui/macros/not-utf8-bin-file.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8
--> $DIR/not-utf8-bin-file.rs:2:14
|
LL | let _ = "�|�␂!5�cc␕␂��";
| ^ `[193]` is not valid utf-8

error: aborting due to 1 previous error

2 changes: 1 addition & 1 deletion tests/ui/macros/not-utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
//@ reference: input.encoding.invalid

fn foo() {
include!("not-utf8.bin")
include!("not-utf8.bin");
}
9 changes: 7 additions & 2 deletions tests/ui/macros/not-utf8.stderr
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
error: couldn't read $DIR/not-utf8.bin: stream did not contain valid UTF-8
error: couldn't read `$DIR/not-utf8.bin`: stream did not contain valid UTF-8
--> $DIR/not-utf8.rs:6:5
|
LL | include!("not-utf8.bin")
LL | include!("not-utf8.bin");
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
note: `[193]` is not valid utf-8
--> $DIR/not-utf8.bin:1:1
|
LL | �|�␂!5�cc␕␂�Ӻi��WWj�ȥ�'�}�␒�J�ȉ��W�␞O�@����␜w�V���LO����␔[ ␃_�'���SQ�~ذ��ų&��- ��lN~��!@␌ _#���kQ��h�␝�:�...
| ^
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)

error: aborting due to 1 previous error
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/modules/path-no-file-name.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//@ normalize-stderr: "\.:.*\(" -> ".: $$ACCESS_DENIED_MSG ("
//@ normalize-stderr: "\.`:.*\(" -> ".`: $$ACCESS_DENIED_MSG ("
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"

#[path = "."]
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/modules/path-no-file-name.stderr
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
error: couldn't read $DIR/.: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
error: couldn't read `$DIR/.`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
--> $DIR/path-no-file-name.rs:5:1
|
LL | mod m;
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/parser/issues/issue-5806.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//@ normalize-stderr: "parser:.*\(" -> "parser: $$ACCESS_DENIED_MSG ("
//@ normalize-stderr: "parser`:.*\(" -> "parser`: $$ACCESS_DENIED_MSG ("
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"

#[path = "../parser"]
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/parser/issues/issue-5806.stderr
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
error: couldn't read $DIR/../parser: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
error: couldn't read `$DIR/../parser`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
--> $DIR/issue-5806.rs:5:1
|
LL | mod foo;
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/parser/mod_file_with_path_attr.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//@ normalize-stderr: "not_a_real_file.rs:.*\(" -> "not_a_real_file.rs: $$FILE_NOT_FOUND_MSG ("
//@ normalize-stderr: "not_a_real_file.rs`:.*\(" -> "not_a_real_file.rs`: $$FILE_NOT_FOUND_MSG ("

#[path = "not_a_real_file.rs"]
mod m; //~ ERROR not_a_real_file.rs
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/parser/mod_file_with_path_attr.stderr
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
error: couldn't read $DIR/not_a_real_file.rs: $FILE_NOT_FOUND_MSG (os error 2)
error: couldn't read `$DIR/not_a_real_file.rs`: $FILE_NOT_FOUND_MSG (os error 2)
--> $DIR/mod_file_with_path_attr.rs:4:1
|
LL | mod m;
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/unpretty/staged-api-invalid-path-108697.stderr
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
error: couldn't read $DIR/lol: No such file or directory (os error 2)
error: couldn't read `$DIR/lol`: No such file or directory (os error 2)
--> $DIR/staged-api-invalid-path-108697.rs:8:1
|
LL | mod foo;
Expand Down

0 comments on commit 472d719

Please sign in to comment.