Read a file line by line

From Rosetta Code
Task
Read a file line by line
You are encouraged to solve this task according to the task description, using any language you may know.

The task is to demonstrate how to read a file line by line (as opposed to reading the entire file).

See also: Input loop.

Brat

<lang brat>include :file

file.each_line "foobar.txt" { line |

 p line

}</lang>

C

This is not easy to do, because the C library is so primitive. There is fgets(), but this function limits the length of a line. fgets() also loses characters if there is a NUL character '\0' in the middle of a line.

The next example uses fgetln() and err() from BSD, but will not work with most other systems.

Works with: OpenBSD version 4.8

<lang c>#include <err.h> /* err */

  1. include <stdio.h> /* fopen, fgetln, fputs, fwrite */

/*

* Read a file line by line.
* http://rosettacode.org/wiki/Read_a_file_line_by_line
*/

int main() { FILE *f; size_t len; char *line;

f = fopen("foobar.txt", "r"); if (f == NULL) err(1, "foobar.txt");

/* * This loop reads each line. * Remember that line is not a C string. * There is no terminating '\0'. */ while (line = fgetln(f, &len)) { /* * Do something with line. */ fputs("LINE: ", stdout); fwrite(line, len, 1, stdout); } if (!feof(f)) err(1, "fgetln");

return 0; }</lang>

For other systems, you can code something like fgetln(). The next example refactors the code from Synchronous concurrency#C that reads lines.

<lang c>#include <stdlib.h> /* exit, malloc, realloc, free */

  1. include <stdio.h> /* fopen, fgetc, fputs, fwrite */

struct line_reader { /* All members are private. */ FILE *f; char *buf; size_t siz; };

/*

* Initializes a line reader _lr_ for the stream _f_.
*/

void lr_init(struct line_reader *lr, FILE *f) { lr->f = f; lr->buf = NULL; }

/*

* Reads the next line. If successful, returns a pointer to the line,
* and sets *len to the number of characters, at least 1. The result is
* _not_ a C string; it has no terminating '\0'. The returned pointer
* remains valid until the next call to next_line() or lr_free() with
* the same _lr_.
*
* If not successful, returns NULL. This can mean an end of file,
* an error on the stream, or an error of memory allocation.
*/

char * next_line(struct line_reader *lr, size_t *len) { size_t newsiz; int c; char *newbuf;

if (lr->buf == NULL) { /* New buffer. */ lr->buf = malloc(4096); if (lr->buf == NULL) return NULL; lr->siz = 4096; }

*len = 0; /* Start with empty line. */ for (;;) { c = fgetc(lr->f); /* Read next character. */ if (ferror(lr->f)) return NULL;

if (c == EOF) { /* * End of file is also end of line, ` * unless the line would be empty. */ if (*len == 0) return NULL; else return lr->buf; } else { /* Append c to the buffer. */ if (*len == lr->siz) { /* Need a bigger buffer! */ newsiz = lr->siz * 2; newbuf = realloc(lr->buf, newsiz); if (newbuf == NULL) return NULL; lr->buf = newbuf; lr->siz = newsiz; } lr->buf[(*len)++] = c;

/* '\n' is end of line. */ if (c == '\n') return lr->buf; } } }

/*

* Frees memory used by _lr_.
*/

void lr_free(struct line_reader *lr) { free(lr->buf); lr->buf = NULL; }

/*

* Read a file line by line.
* http://rosettacode.org/wiki/Read_a_file_line_by_line
*/

int main() { struct line_reader lr; FILE *f; size_t len; char *line;

f = fopen("foobar.txt", "r"); if (f == NULL) { perror("foobar.txt"); exit(1); }

/* * This loop reads each line. * Remember that line is not a C string. * There is no terminating '\0'. */ lr_init(&lr, f); while (line = next_line(&lr, &len)) { /* * Do something with line. */ fputs("LINE: ", stdout); fwrite(line, len, 1, stdout); } if (!feof(f)) { perror("next_line"); exit(1); } lr_free(&lr);

return 0; }</lang>

D

<lang d>import std.stdio;

void main() {

   foreach (line; File("foobar.txt").byLine())
       write(line);

}</lang> The File is managed by reference count, and it gets closed when it gets out of scope or it changes. The 'line' is a char[] (with newline), so if you need a string you have to idup it.

J

J currently discourages this "read just one line" approach. In addition to the arbitrary character of lines, there are issues of problem size and scope (what happens when you have a billion characters between your newline delimiters?). Usually, it's easier to just read the entire file, or memory map the file, and when files are so large that that is not practical it's probably better to put the programmer in explicit control of issues like block sizes and exception handling.

This implementation looks for lines separated by ascii character 10. Lines returned here do not include the line separater character. Files with no line-separating character at the end are treated as well formed -- if the last character of the file is the line separator that means that you have an empty line at the end of the file.

This implementation does nothing special when dealing with multi-gigabyte lines. If you encounter an excessively large line and if do not have enough physical memory, your system will experience heavy memory pressure. If you also do not have enough virtual memory to hold a line you will get an out of memory exception.

<lang j>cocurrent 'linereader'

 NB. configuration parameter
 blocksize=: 400000
 NB. implementation
 offset=: 0
 position=: 0
 buffer=: 
 lines=:
 create=:3 :0
   name=:boxxopen y
   size=:1!:4 name
   blocks=:2 <@(-~/\)\ ~. size <. blocksize * i. 1 + >. size % blocksize
 )
 readblocks=:3 :0
    if.0=#blocks do.return.end.
    if.1<#lines do.return.end.
    whilst.-.LF e.chars do.
      orig=:buffer=:buffer,chars=.1!:11 name,{.blocks
      blocks=:}.blocks
      lines=:<;._2 buffer,LF
    end.
    buffer=:_1{::lines
 )
 next=:3 :0
   while.(#blocks)*.2>#lines do.readblockend.
   r=.0{::lines
   lines=:}.lines
   r
 )</lang>

<lang j> example=: '/tmp/example.txt' conew 'linereader'

  next__example

this is line 1

  next__example

and this is line 2</lang>

Perl

For the simple case of iterating over the lines of a file you can do: <lang perl>open(FOO, '<', 'foobar.txt') or die $!; while (<FOO>) { # each line is stored in $_, with terminating newline

   chomp; # chomp, short for chomp($_), removes the terminating newline
   process($_);

} close(FOO);</lang> The angle bracket operator < > reads a filehandle line by line. (The angle bracket operator can also be used to open and read from files that match a specific pattern, by putting the pattern in the brackets.)

Without specifying the variable that each line should be put into, it automatically puts it into $_, which is also conveniently the default argument for many Perl functions. If you wanted to use your own variable, you can do something like this: <lang perl>open(FOO, '<', 'foobar.txt') or die $!; while (my $line = <FOO>) {

   chomp($line);
   process($_);

} close(FOO);</lang>

The special use of the angle bracket operator with nothing inside, will read from all files whose names were specified on the command line: <lang perl>while (<>) {

   chomp;
   process($_);

}</lang>

Python

For the simple case of iterating over the lines of a file you can do: <lang python>with open("foobar.txt") as f:

   for line in f:
       process(line)</lang>

The with statement ensures the correct closing of the file after it is processed, and iterating over the file object f, adjusts what is considered line separator character(s) so the code will work on multiple operating systems such as Windows, Mac, and Solaris without change.

Python also has the fileinput module. This can process multiple files parsed from the command line and can be set to modify files 'in-place'. <lang python>import fileinput for line in fileinput.input():

   process(line)

</lang>

Ruby

<lang ruby>IO.foreach "foobar.txt" do |line|

 puts line

end</lang>

Tcl

<lang tcl>set f [open "foobar.txt"] while {[gets $f line] >= 0} {

   # This loops over every line
   puts ">>$line<<"

} close $f</lang>

TUSCRIPT

<lang tuscript> $$ MODE TUSCRIPT

datei="rosetta.txt"

ACCESS q: READ/RECORDS/UTF8 $datei s,line

LOOP
READ/NEXT/EXIT q
PRINT line
ENDLOOP

ENDACCESS q </lang> or: <lang tuscript> LOOP line=datei

PRINT line

ENDLOOP </lang>

UNIX Shell

Works with: sh
Works with: bash
Works with: pdksh

<lang sh>cat foobar.txt | while read line ; do

 # This loop repeats for each line of the file
 echo "$line"

done</lang>

Visual Basic

<lang vb>' Read lines from a file ' ' (c) Copyright 1993 - 2011 Mark Hobley ' ' This code was ported from an application program written in Microsoft Quickbasic ' ' This code can be redistributed or modified under the terms of version 1.2 of ' the GNU Free Documentation Licence as published by the Free Software Foundation.

Sub readlinesfromafile()

 var.filename = "foobar.txt"
 var.filebuffersize = ini.inimaxlinelength
 Call openfileread
 If flg.error = "Y" Then
   flg.abort = "Y"
   Exit Sub
 End If
 If flg.exists <> "Y" Then
   flg.abort = "Y"
   Exit Sub
 End If

readfilelabela:

 Call readlinefromfile
 If flg.error = "Y" Then
   flg.abort = "Y"
   Call closestream
   flg.error = "Y"
   Exit Sub
 End If
 If flg.endoffile <> "Y" Then
   ' We have a line from the file
   Print message$
   GoTo readfilelabela
 End If
 ' End of file reached
 ' Close the file and exit
 Call closestream
 Exit Sub

End Sub

Sub openfileread()

 flg.streamopen = "N"
 Call checkfileexists
 If flg.error = "Y" Then Exit Sub
 If flg.exists <> "Y" Then Exit Sub
 Call getfreestream
 If flg.error = "Y" Then Exit Sub
 var.errorsection = "Opening File"
 var.errordevice = var.filename
 If ini.errortrap = "Y" Then
   On Local Error GoTo openfilereaderror
 End If
 flg.endoffile = "N"
 Open var.filename For Input As #var.stream Len = var.filebuffersize
 flg.streamopen = "Y"
 Exit Sub

openfilereaderror:

 var.errorcode = Err
 Call errorhandler
 resume '!!

End Sub

Public Sub checkfileexists()

 var.errorsection = "Checking File Exists"
 var.errordevice = var.filename
 If ini.errortrap = "Y" Then
   On Local Error GoTo checkfileexistserror
 End If
 flg.exists = "N"
 If Dir$(var.filename, 0) <> "" Then
   flg.exists = "Y"
 End If
 Exit Sub

checkfileexistserror:

 var.errorcode = Err
 Call errorhandler

End Sub

Public Sub getfreestream()

 var.errorsection = "Opening Free Data Stream"
 var.errordevice = ""
 If ini.errortrap = "Y" Then
   On Local Error GoTo getfreestreamerror
 End If
 var.stream = FreeFile
 Exit Sub

getfreestreamerror:

 var.errorcode = Err
 Call errorhandler
 resume '!!

End Sub

Sub closestream()

 If ini.errortrap = "Y" Then
   On Local Error GoTo closestreamerror
 End If
 var.errorsection = "Closing Stream"
 var.errordevice = ""
 flg.resumenext = "Y"
 Close #var.stream
 If flg.error = "Y" Then
   flg.error = "N"
   '!! Call unexpectederror
 End If
 flg.streamopen = "N"
 Exit Sub

closestreamerror:

 var.errorcode = Err
 Call errorhandler
 resume next

End Sub

Public Sub errorhandler()

 tmp$ = btrim$(var.errorsection)
 tmp2$ = btrim$(var.errordevice)
 If tmp2$ <> "" Then
   tmp$ = tmp$ + " (" + tmp2$ + ")"
 End If
 tmp$ = tmp$ + " : " + Str$(var.errorcode)
 tmp1% = MsgBox(tmp$, 0, "Error!")
 flg.error = "Y"
 If flg.resumenext = "Y" Then
   flg.resumenext = "N"

' Resume Next

 Else
   flg.error = "N"

' Resume

 End If

End Sub

Public Function btrim$(arg$)

 btrim$ = LTrim$(RTrim$(arg$))

End Function</lang>