Convert CSV records to TSV: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
m (handle \r)
m (→‎{{header|Wren}}: Changed to Wren S/H)
 
(8 intermediate revisions by 3 users not shown)
Line 14:
 
; A CSV record
[[Category:PEG]]
Our starting point will be a character set that includes ASCII; the language
of regular expressions (which will be denoted by strings of the form
r'REGEX'); and the following [[:Category:PEG|PEG]] (parsing expression grammar) grammar for a
single CSV record:
<pre>
Line 113 ⟶ 114:
aRETURNb, Be sure to change RETURN to the '\r' control character (#xd)
a\b
</pre>
 
[[category:CSV]]
[[category:TSV]]
 
=={{header|ALGOL 68}}==
All input \ characters are doubled in the output.<br>
As with some of the other samples, the input data is stored in an array, not read from a file.
<syntaxhighlight lang="algol68">
BEGIN # convert lines of CSV to TSV #
CHAR nul = REPR 0;
CHAR tab = REPR 9;
CHAR lf = REPR 10;
CHAR cr = REPR 13;
# returns s with some control characters converted to <name> #
PROC show ctrl = ( STRING s )STRING:
BEGIN
STRING result := "";
FOR i FROM LWB s TO UPB s DO
result +:= IF s[ i ] = nul THEN "<nul>"
ELIF s[ i ] = tab THEN "<tab>"
ELIF s[ i ] = cr THEN "<cr>"
ELIF s[ i ] = lf THEN "<lf>"
ELSE s[ i ]
FI
OD;
result
END # show ctrl # ;
# returns csv converted to TSV #
PROC csv2tsv = ( STRING csv )STRING:
BEGIN
BOOL at end := FALSE;
CHAR ch := nul;
# sets ch to the next character in csv, if there is one #
PROC next = VOID: ch := IF c pos < c max
THEN csv[ c pos +:= 1 ]
ELSE at end := TRUE
; nul
FI;
# skips over spaces and returns the count of skipped spaces #
PROC spaces = INT:
BEGIN
INT s count := 0;
WHILE NOT at end AND ch = " " DO s count +:= 1; next OD;
s count
END # spaces # ;
# adds ch to the TSV - converting some characters to escaped form #
PROC add = VOID: tsv +:= IF ch = "\" THEN "\\"
ELIF ch = nul THEN "\0"
ELIF ch = cr THEN "\r"
ELIF ch = lf THEN "\n"
ELIF ch = tab THEN "\t"
ELSE ch
FI;
# parse the csv and generate the tsv #
STRING tsv := "";
INT c pos := LWB csv - 1;
INT c max = UPB csv;
WHILE NOT at end DO
# spaces are not significant around quoted fields but are part of unquoted fields #
next;
INT space count := spaces;
IF ch = """" THEN
# quoted field part #
WHILE next;
WHILE NOT at end AND ch /= """" DO add; next OD;
IF NOT at end THEN
next;
IF ch = """" THEN
# embedded quote #
add
FI
FI;
NOT at end AND ch = """"
DO SKIP OD;
space count := spaces;
IF at end OR ch = "," THEN
# nothing significant after the quoted field part #
space count := 0
FI
FI;
# unquoted field part #
tsv +:= space count * " ";
WHILE NOT at end AND ch /= "," DO add; next OD;
IF ch = "," THEN
# have another field following this one #
tsv +:= tab
FI
OD;
tsv
END # csv2tsv # ;
# task test cases #
[]STRING tests =
( "a,""b"""
, """a"",""b""""c"""
, ""
, ",a"
, "a,"""
, " a , ""b"""
, """12"",34"
, "a" + tab + "b, That is a TAB character"
, "a\tb"
, "a\n\rb"
, "a" + nul + "b, That is a NUL character"
, "a" + cr + "b, Be sure to change RETURN to the '\r' control character (#xd)"
, "a\b"
);
FOR i FROM LWB tests TO UPB tests DO
print( ( " {{", show ctrl( tests[ i ] ), "}}", newline
, " -> {{", show ctrl( csv2tsv( tests[ i ] ) ), "}}", newline
)
)
OD
END
</syntaxhighlight>
{{out}}
<pre>
{{a,"b"}}
-> {{a<tab>b}}
{{"a","b""c"}}
-> {{a<tab>b"c}}
{{}}
-> {{}}
{{,a}}
-> {{<tab>a}}
{{a,"}}
-> {{a<tab>}}
{{ a , "b"}}
-> {{ a <tab>b}}
{{"12",34}}
-> {{12<tab>34}}
{{a<tab>b, That is a TAB character}}
-> {{a\tb<tab> That is a TAB character}}
{{a\tb}}
-> {{a\\tb}}
{{a\n\rb}}
-> {{a\\n\\rb}}
{{a<nul>b, That is a NUL character}}
-> {{a\0b<tab> That is a NUL character}}
{{a<cr>b, Be sure to change RETURN to the '\r' control character (#xd)}}
-> {{a\rb<tab> Be sure to change RETURN to the '\\r' control character (#xd)}}
{{a\b}}
-> {{a\\b}}
</pre>
 
Line 227 ⟶ 371:
 
def record: field | star(consume(",") | field);
 
def parse: {remainder: .} | record | .result;
 
def csv2tsv:
{remainder: .} | record | .result | @tsv ;
parse
| @tsv ;
 
# Transform an entire file assuming jq is invoked with the -n option
Line 238 ⟶ 379:
</syntaxhighlight>
{{output}}
As required:
* Backquotes are uniformly duplicated.
* Backslashes are uniformly duplicated.
* Until recently gojq did not handle NUL (#x0) properly.
 
Line 258 ⟶ 400:
end
 
const testfile = "test.tmp"
for test_string in [
fh = open(testfile, "w")
"""a,"b\"""",
 
"""\"a","b""c\"""",
write(fh, "","
"a,a",b"
"a",\"b""c",
 
" a , \"b\"",
,a
"""\"12",34""",
a,"
"a\tb,", # That is a TAB character
a , "b"
raw"a\tb", # That is not
"12",34
raw"a\n\rb",
a\tb, TAB
"a\0b", # That is a NUL character
a\\tb
"a\nb", # That is a LF (linefeed) character
a\\n\\rb
"a\rb", # That is a CR (carriage return) character
a\0b, NUL
raw"a\b"]
a\rb, RETURN
a\\b""")
 
close(fh)
 
for test_string in split(read(testfile, String), "\n")
csv, tsv = csv_tsv(test_string)
println(lpad(csv, 12), " => ", tsv)
Line 285 ⟶ 433:
a , "b" => a <TAB>b
"12",34 => 12<TAB>34
a\tb, TAB => a\tb<TAB> TAB
a\\tb => a\\tb
a\\n\\rb => a\\n\\rb
a\0b, NUL => a\0b<TAB> NUL
a\rb, RETURN => a\rb<TAB> RETURN
a\nb => a\nb
a\rb => a\rb
a\\b => a\\b
</pre>
Line 349 ⟶ 496:
{{libheader|Wren-str}}
Backslashes are only duplicated for escaped \t, \n and \r.
<syntaxhighlight lang="ecmascriptwren">import "./ioutil" for FileUtil
import "./str" for Str
 
9,488

edits