Skip to content

Commit

Permalink
bugfix for Incorrect encoding and Update the content on encoding in t…
Browse files Browse the repository at this point in the history
…he document (#548)
  • Loading branch information
fengzyf authored Jun 15, 2024
1 parent 339d396 commit c357c9a
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 62 deletions.
32 changes: 2 additions & 30 deletions LatexIndent/GetYamlSettings.pm
Original file line number Diff line number Diff line change
Expand Up @@ -389,36 +389,8 @@ sub yaml_read_settings {
# output the contents of indentconfig to the log file
$logger->info( Dump \%{ $userSettings->[0] } );

# change the encoding of the paths according to the field `encoding`
if ( $userSettings and ( ref( $userSettings->[0] ) eq 'HASH' ) and $userSettings->[0]->{encoding} ) {
use Encode;
my $encoding = $userSettings->[0]->{encoding};
my $encodingObject = find_encoding($encoding);

# Check if the encoding is valid.
if ( ref($encodingObject) ) {
$logger->info("*Encoding of the paths is $encoding");
foreach ( @{ $userSettings->[0]->{paths} } ) {
my $temp = $encodingObject->encode("$_");
$logger->info("Transform file encoding: $_ -> $temp");
push( @absPaths, $temp );
}
}
else {
$logger->warn("*encoding \"$encoding\" not found");
$logger->warn("Ignore this setting and will take the default encoding.");
@absPaths = @{ $userSettings->[0]->{paths} };
foreach ( @{ $userSettings->[0]->{paths} } ) {
push( @absPaths, $_ );
}
}
}
else # No such setting, and will take the default
{
# $logger->info("*Encoding of the paths takes the default.");
foreach ( @{ $userSettings->[0]->{paths} } ) {
push( @absPaths, $_ );
}
foreach ( @{ $userSettings->[0]->{paths} } ) {
push( @absPaths, $_ );
}
}

Expand Down
40 changes: 30 additions & 10 deletions LatexIndent/UTF8CmdLineArgsFileOperation.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,61 @@ use warnings;
use feature qw( say state );
use utf8;
use Config qw( %Config );
use Encode qw( decode encode );
use Encode qw(find_encoding decode encode );

use Exporter qw/import/;
our @EXPORT_OK
= qw/commandlineargs_with_encode @new_args copy_with_encode exist_with_encode open_with_encode zero_with_encode read_yaml_with_encode isdir_with_encode mkdir_with_encode/;

our $encodingObject;

if ($^O eq 'MSWin32') {
my $encoding_sys = 'cp' . Win32::GetACP();
$encodingObject = find_encoding( $encoding_sys );

# Check if the encoding is valid.
unless ( ref($encodingObject) ) {
$encodingObject = find_encoding( 'utf-8' );
}
}
else {
$encodingObject = find_encoding( 'utf-8' );
}

sub copy_with_encode {
use File::Copy;
my ( $source, $destination ) = @_;

if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::Unicode::File;
Win32::Unicode::File->import(qw(copyW));
copyW( $source, $destination, 1 );
}
else {
$source = $encodingObject->encode($source);
$destination = $encodingObject->encode($destination);
copy( $source, $destination );
}
}

sub exist_with_encode {
my ($filename) = @_;

if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::Unicode::File;
Win32::Unicode::File->import(qw(statW));
return statW($filename);
}
else {
$filename = $encodingObject->encode($filename);
return -e $filename;
}
}

sub zero_with_encode {
my ($filename) = @_;

if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::Unicode::File;
Win32::Unicode::File->import(qw(file_size));
my $size = file_size($filename);
Expand All @@ -53,6 +71,7 @@ sub zero_with_encode {
}
}
else {
$filename = $encodingObject->encode($filename);
return -z $filename;
}
}
Expand All @@ -62,7 +81,7 @@ sub open_with_encode {
my $filename = shift;
my $fh;

if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::Unicode::File;
Win32::Unicode::File->import;
$fh = Win32::Unicode::File->new;
Expand All @@ -74,6 +93,7 @@ sub open_with_encode {
}
}
else {
$filename = $encodingObject->encode($filename);
if ( open( $fh, $mode, $filename ) ) {
return $fh;
}
Expand All @@ -95,21 +115,22 @@ sub read_yaml_with_encode {
sub isdir_with_encode {
my $path = shift;

if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::Unicode::File;
Win32::Unicode::File->import(qw(file_type));

return file_type( 'd', $path );
}
else {
$path = $encodingObject->encode($path);
return -d $path;
}
}

sub mkdir_with_encode {
my $path = shift;

if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::Unicode::Dir;
Win32::Unicode::Dir->import(qw(mkdirW));

Expand All @@ -118,15 +139,15 @@ sub mkdir_with_encode {
else {
require File::Path;
File::Path->import(qw(make_path));

$path = $encodingObject->encode($path);
make_path($path);
}
}

#https://stackoverflow.com/a/63868721
#https://stackoverflow.com/a/44489228
sub commandlineargs_with_encode {
if ( $FindBin::Script eq 'latexindent.exe' ) {
if ( $FindBin::Script =~ /\.exe$/ ) {
require Win32::API;
import Win32::API qw( ReadMemory );

Expand Down Expand Up @@ -207,7 +228,6 @@ sub commandlineargs_with_encode {
@ARGV = @{$args};
}
else {
my $encodingObject = "utf-8";
@ARGV = map { decode( $encodingObject, $_ ) } @ARGV;
our @new_args = @ARGV;
}
Expand Down
29 changes: 17 additions & 12 deletions documentation/sec-appendices.tex
Original file line number Diff line number Diff line change
Expand Up @@ -922,21 +922,26 @@
and \lstinline!-----! respectively.
\end{example}

\section{Encoding indentconfig.yaml}\label{app:encoding}
In relation to \vref{sec:indentconfig}, Windows users that encounter encoding issues
with \texttt{indentconfig.yaml}, may wish to run the following command in either
\texttt{cmd.exe} or \texttt{powershell.exe}:
\section{Encoding}\label{app:encoding}

When using latexindent in different ways on different systems, the range of characters supported by its switches/flags/options (see \vref{sec:commandline} ) may vary.

For the Windows executable file \texttt{latexindent.exe}, its options support UTF-8 characters.

For the Windows Perl script \texttt{latexindent.pl}, its option switch supports the characters supported by the encoding corresponding to the system code page. You can check the system code page by running the following command in either \texttt{cmd.exe} or \texttt{powershell.exe}:
\begin{dosprompt}
chcp
\end{dosprompt}
They may receive the following result
chcp
\end{dosprompt}
which may receive the following result
\begin{dosprompt}
Active code page: 936
\end{dosprompt}
and can then use the settings given in \cref{lst:indentconfig-encoding1} within their
\texttt{indentconfig.yaml}, where 936 is the result of the \texttt{chcp} command.
Active code page: 936
\end{dosprompt}
and then the characters supported by the code page can be found in \href{Microsoft's code page identifier table}{https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers}.
For example, the characters supported by the encoding corresponding to code page 936 are: ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312).

For Ubuntu Linux and macOS users, whether using the Perl script or the executable file, the options support UTF-8 characters.


\cmhlistingsfromfile[style=yaml-LST]{demonstrations/encoding1.yaml}[yaml-TCB]{\texttt{encoding} demonstration for \texttt{indentconfig.yaml}}{lst:indentconfig-encoding1}

\section{dos2unix linebreak adjustment}

Expand Down
3 changes: 3 additions & 0 deletions documentation/sec-how-to-use.tex
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ \subsection{From the command line}\label{sec:commandline}
written to \texttt{indent.log}, but other additional information will be written
depending on which of the following options are used.

When using \texttt{latexindent.pl} in different ways on different systems, the range of characters supported by its switches/flags/options may vary.
We discuss these in Section \cref{app:encoding}.

\flagbox{-v, --version}
\index{switches!-v, --version definition and details}
\announce{2017-06-25}{version}
Expand Down
10 changes: 0 additions & 10 deletions documentation/sec-indent-config-and-settings.tex
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,6 @@ \subsection{indentconfig.yaml and .indentconfig.yaml}\label{subsec:indentconfig}
\texttt{latexindent.yaml} and friends settings files. This can lead to creative nesting
of configuration files; a demonstration is given in \vref{sec:appendix:paths}.

If you find that \announce{2021-06-19}{encoding option for indentconfig.yaml}
\texttt{latexindent.pl} does not read your YAML file, then it might be as a result of
the default commandline encoding not being UTF-8; normally this will only occur for
Windows users. In this case, you might like to explore the \texttt{encoding} option for
\texttt{indentconfig.yaml} as demonstrated in \cref{lst:indentconfig-encoding}.%

\cmhlistingsfromfile[style=yaml-LST]{demonstrations/encoding.yaml}[yaml-TCB]{The \texttt{encoding} option for \texttt{indentconfig.yaml}}{lst:indentconfig-encoding}

Thank you to \cite{qiancy98} for this contribution; please see \vref{app:encoding} and
details within \cite{encoding} for further information.

\subsection{localSettings.yaml and friends}\label{sec:localsettings}
The \texttt{-l} switch tells \texttt{latexindent.pl} to look for
Expand Down
2 changes: 2 additions & 0 deletions documentation/sec-introduction.tex
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ \subsection{License}
throughout this document for details}.

\subsection{Quick start}\label{sec:quickstart}
When \texttt{latexindent.pl} reading and writing files, the files are read and written in UTF-8 format by default. That is to say, the encoding format for tex and yaml files needs to be in UTF-8 format.

If you'd like to get started with \texttt{latexindent.pl} then simply type

\begin{commandshell}
Expand Down

0 comments on commit c357c9a

Please sign in to comment.