Summary: Find and print the mentions of a given string in the recent chat logs from a chatroom. Only use your programming language's standard library.

Task
Retrieve and search chat history
You are encouraged to solve this task according to the task description, using any language you may know.
Task

Details:

The Tcl Chatroom is a online chatroom. Its conversations are logged. It is useful to know if some has mentioned you or your project in the chatroom recently. You can find this out by searching the chat logs. The logs are publicly available at http://tclers.tk/conferences/tcl/. One log file corresponds to the messages from one day in Germany's current time zone. Each chat log file has the name YYYY-MM-DD.tcl where YYYY is the year, MM is the month and DD the day. The logs store one message per line. The messages themselves are human-readable and their internal structure doesn't matter.

Retrieve the chat logs from the last 10 days via HTTP. Find those lines that include a particular substring and print them in the following format:

<log file URL>
------
<matching line 1>
<matching line 2>
...
<matching line N>
------

The substring will be given to your program as a command line argument.

You need to account for the possible time zone difference between the client running your program and the chat log writer on the server to not miss any mentions. (For example, if you generated the log file URLs naively based on the local date, you could miss mentions if it was already April 5th for the logger but only April 4th for the client.) What this means in practice is that you should either generate the URLs in the time zone Europe/Berlin or, if your language can not do that, add an extra day (today + 1) to the range of dates you check, but then make sure to not print parts of a "not found" page by accident if a log file doesn't exist yet.

The code should be contained in a single-file script, with no "project" or "dependency" file (e.g., no requirements.txt for Python). It should only use a given programming language's standard library to accomplish this task and not rely on the user having installed any third-party packages.

If your language does not have an HTTP client in the standard library, you can speak raw HTTP 1.0 to the server. If it can't parse command line arguments in a standalone script, read the string to look for from the standard input.

Elixir

<lang elixir>#! /usr/bin/env elixir defmodule Mentions do

 def get(url) do
   {:ok, {{_, 200, _}, _, body}} =
     url
     |> String.to_charlist()
     |> :httpc.request()
   data = List.to_string(body)
   if Regex.match?(~r|<!Doctype HTML.*<Title>URL Not Found</Title>|s, data) do
     {:error, "log file not found"}
   else
     {:ok, data}
   end
 end
 def perg(haystack, needle) do
   haystack
   |> String.split("\n")
   |> Enum.filter(fn x -> String.contains?(x, needle) end)
 end
 def generate_url(n) do
   date_str =
     DateTime.utc_now()
     |> DateTime.to_unix()
     |> (fn x -> x + 60*60*24*n end).()
     |> DateTime.from_unix!()
     |> (fn %{year: y, month: m, day: d} ->
       :io_lib.format("~B-~2..0B-~2..0B", [y, m, d])
     end).()
   "http://tclers.tk/conferences/tcl/#{date_str}.tcl"
 end

end

[needle] = System.argv()

application.start(:inets)

back = 10

  1. Elixir does not come standard with time zone definitions, so we add an extra
  2. day to account for the possible difference between the local and the server
  3. time.

for i <- -back..1 do

 url = Mentions.generate_url(i)
 with {:ok, haystack} <- Mentions.get(url),
      # If the result is a non-empty list...
      [h | t] <-  Mentions.perg(haystack, needle) do
   IO.puts("#{url}\n------\n#{Enum.join([h | t], "\n")}\n------\n")
 end

end</lang>


F#

<lang fsharp>#!/usr/bin/env fsharpi let server_tz =

   try
       // CLR on Windows
       System.TimeZoneInfo.FindSystemTimeZoneById("W. Europe Standard Time")
   with
       // Mono
       :? System.TimeZoneNotFoundException ->
           System.TimeZoneInfo.FindSystemTimeZoneById("Europe/Berlin")

let get url =

   let req = System.Net.WebRequest.Create(System.Uri(url)) 
   use resp = req.GetResponse()
   use stream = resp.GetResponseStream() 
   use reader = new System.IO.StreamReader(stream) 
   reader.ReadToEnd()

let grep needle (haystack : string) =

   haystack.Split('\n')
   |> Array.toList
   |> List.filter (fun x -> x.Contains(needle))

let genUrl n =

   let day = System.DateTime.UtcNow.AddDays(float n)
   let server_dt = System.TimeZoneInfo.ConvertTimeFromUtc(day, server_tz)
   let timestamp = server_dt.ToString("yyyy-MM-dd")
   sprintf "http://tclers.tk/conferences/tcl/%s.tcl" timestamp

let _ =

   match fsi.CommandLineArgs with
   | [|_; needle|] ->
       let back = 10
       for i in -back .. 0 do
           let url = genUrl i
           let found = url |> get |> grep needle |> String.concat "\n"
           if found <> "" then printfn "%s\n------\n%s\n------\n" url found
           else ()
   | x ->
       printfn "Usage: %s literal" (Array.get x 0)
       System.Environment.Exit(1)</lang>

Python

<lang python>#! /usr/bin/env python3 import datetime import re import urllib.request import sys

def get(url):

   with urllib.request.urlopen(url) as response:
      html = response.read().decode('utf-8')
   if re.match(r'<!Doctype HTML[\s\S]*<Title>URL Not Found</Title>', html):
       return None
   return html

def main():

   template = 'http://tclers.tk/conferences/tcl/%Y-%m-%d.tcl'
   today = datetime.datetime.utcnow()
   back = 10
   needle = sys.argv[1]
   # Since Python does not come standard with time zone definitions, add an
   # extra day to account for the possible difference between the local and the
   # server time.
   for i in range(-back, 2):
       day = today + datetime.timedelta(days=i)
       url = day.strftime(template)
       haystack = get(url)
       if data:
           mentions = [x for x in haystack.split('\n') if needle in x]
           if mentions:
               print('{}\n------\n{}\n------\n'
                         .format(url, '\n'.join(mentions)))

main()</lang>

Ruby

<lang ruby>#! /usr/bin/env ruby require 'net/http' require 'date'

def main

 template = 'http://tclers.tk/conferences/tcl/%Y-%m-%d.tcl'
 today = Date.today
 back = 10
 needle = ARGV[0]
 (-back..0).each do |i|
   day = today + i
   url = day.strftime('http://tclers.tk/conferences/tcl/%Y-%m-%d.tcl')
   haystack = Net::HTTP.get(URI(url)).split("\n")
   mentions = haystack.select { |x| x.include? needle }
   if !mentions.empty?
     puts "#{url}\n------\n#{mentions.join("\n")}\n------\n"
   end
 end

end

main</lang>


Scala

<lang scala>import java.net.Socket import java.net.URL import java.time import java.time.format import java.util.Scanner import scala.collection.JavaConverters._

def get(rawUrl: String): List[String] = {

   val url = new URL(rawUrl)
   val port = if (url.getPort > -1) url.getPort else 80
   val sock = new Socket(url.getHost, port)
   sock.getOutputStream.write(
       s"GET /${url.getPath()} HTTP/1.0\r\n\r\n".getBytes("UTF-8")
   )
   new Scanner(sock.getInputStream).useDelimiter("\n").asScala.toList

}

def genUrl(n: Long) = {

   val date = java.time.ZonedDateTime
       .now(java.time.ZoneOffset.UTC)
       .plusDays(n)
       .format(java.time.format.DateTimeFormatter.ISO_LOCAL_DATE)
   s"http://tclers.tk/conferences/tcl/$date.tcl"

}

val back = 10 val literal = args(0) for (i <- -back to 0) {

   val url = genUrl(i)
   print(get(url).filter(_.contains(literal)) match {
       case List() => ""
       case x => s"$url\n------\n${x.mkString("\n")}\n------\n\n"
   })

}</lang>


Tcl

Tcl 8.5+

<lang tcl>#! /usr/bin/env tclsh package require http

proc get url {

   set r [::http::geturl $url]
   set content [::http::data $r]
   ::http::cleanup $r
   return $content

}

proc grep {needle haystack} {

   lsearch -all \
           -inline \
           -glob \
           [split $haystack \n] \
           *[string map {* \\* ? \\? \\ \\\\ [ \\[ ] \\]} $needle]*

}

proc main argv {

   lassign $argv needle
   set urlTemplate http://tclers.tk/conferences/tcl/%Y-%m-%d.tcl
   set back 10
   set now [clock seconds]
   for {set i -$back} {$i <= 0} {incr i} {
       set date [clock add $now $i days]
       set url [clock format $date \
                             -format $urlTemplate \
                             -timezone :Europe/Berlin]
       set found [grep $needle [get $url]]
       if {$found ne {}} {
           puts $url\n------\n[join $found \n]\n------\n
       }
   }

}

main $argv</lang>

Jim Tcl

<lang tcl>#! /usr/bin/env jimsh proc get url {

   if {![regexp {http://([a-z.]+)(:[0-9]+)?(/.*)} $url _ host port path]} {
       error "can't parse URL \"$url\""
   }
   if {$port eq {}} { set port 80 }
   set ch [socket stream $host:$port]
   puts -nonewline $ch "GET /$path HTTP/1.0\n\n"
   set content [read $ch]
   if {[regexp {^HTTP[^<]+<!Doctype HTML.*<Title>URL Not Found</Title>} \
               $content]} {
       error {log file not found}
   }
   close $ch
   return $content

}

proc grep {needle haystack} {

   lsearch -all \
           -inline \
           -glob \
           [split $haystack \n] \
           *[string map {* \\* ? \\? \\ \\\\ [ \\[ ] \\]} $needle]*

}

proc main argv {

   lassign $argv needle
   set urlTemplate http://tclers.tk/conferences/tcl/%Y-%m-%d.tcl
   set back 10
   set now [clock seconds]
   # Jim Tcl doesn't support time zones, so we add an extra day to account for
   # the possible difference between the local and the server time.
   for {set i -$back} {$i <= 1} {incr i} {
       set date [expr {$now + $i*60*60*24}]
       set url [clock format $date -format $urlTemplate]
       catch {
           set found [grep $needle [get $url]]
           if {$found ne {}} {
               puts $url\n------\n[join $found \n]\n------\n
           }
       }
   }

}

main $argv</lang>

zkl

<lang zkl>#<<<# http://tclers.tk/conferences/tcl/: 2017-04-03.tcl 30610 bytes Apr 03, 2017 21:55:37 2017-04-04.tcl 67996 bytes Apr 04, 2017 21:57:01 ...

Contents (eg 2017-01-19.tcl): m 2017-01-19T23:01:02Z ijchain {*** Johannes13__ leaves} m 2017-01-19T23:15:37Z ijchain {*** fahadash leaves} m 2017-01-19T23:27:00Z ijchain {*** Buster leaves} ...

  1. <<<#

var [const] CURL=Import.lib("zklCurl")(); // libCurl instance

template:="http://tclers.tk/conferences/tcl/%4d-%02d-%02d.tcl"; ymd  :=Time.Clock.UTC[0,3]; // now, (y,m,d) back  :=10; // days in the past needle  :=vm.nthArg(0); // search string foreach d in ([-back+1..0]){ // we want day -9,-8,-7..0 (today)

  date :=Time.Date.subYMD(ymd, 0,0,-d);   // date minus days
  url  :=template.fmt(date.xplode());
  haystack:=CURL.get(url);	// (request bytes, header length)
  haystack=haystack[0].del(0,haystack[1]);	// remove HTML header
  mentions:=haystack.filter("find",needle);	// search lines
  if(mentions) println("%s\n------\n%s------\n".fmt(url,mentions.text));

}</lang> While zkl supports TCP natively and talking simple HTTP is easy, Curl is way easier and fully supports the protocol.

Output:
$ zkl bbb suchenwi
http://tclers.tk/conferences/tcl/2017-04-24.tcl
------
m 2017-04-24T05:33:53Z {} {suchenwi has become available}
m 2017-04-24T06:38:31Z suchenwi {Hi Arjen - and bye. off to donuts}
m 2017-04-24T06:55:57Z {} {suchenwi has left}
...
------

http://tclers.tk/conferences/tcl/2017-04-30.tcl
...
------
http://tclers.tk/conferences/tcl/2017-05-01.tcl

------
...
http://tclers.tk/conferences/tcl/2017-05-03.tcl
------
m 2017-05-03T16:19:54Z {} {suchenwi has become available}
m 2017-05-03T16:20:40Z suchenwi {/me waves}
m 2017-05-03T16:21:57Z suchenwi {I'm on countdown at work: 17 work days to go...
...