Exercises in Programming Style

I’ve been working through the first few examples in Cristina Lopes’s book Exercises in Programming Style. This has been an excellent way of becoming familiar with the Go programming language, getting a stronger sense of why programming languages have developed in the way that they have and experiencing some of the advantages and drawbacks of working within a particular paradigm or type system. The following three styles are from the first section of the book on classic styles no longer commonly used.

Good Old Times

Constraints

  • Very small amount of primary memory, typically orders of magnitude smaller than the data that needs to be processed/generated.
  • No labels – i.e. no variable names or tagged memory addresses. All we have is memory that is addressable with numbers.

Exercises in Programming Style

Commentary

This is a slow, sprawling program that was stressful to write and is difficult to understand. This is the same program used in all the examples here and in Lopes’s book: It takes a file name as input, counts the occurences of each word (not including “stop words” such as ‘a’, ’to’, ‘your’, etc.) and outputs the top 25 most frequent.

This is the style of assembly programming and it doesn’t play well with a typed programming language such as Go which has pushed me to define multiple slices to represent “primary memory” (i.e. RAM and registers) where Lopes’s Python example uses a single list. “Secondary memory”, used to store the word frequencies, is a file.

The program is slow because of the number of times it needs to read from secondary memory and it is difficult to understand due to the absence of labels, which means no subroutines (barring Go’s standard library functions–it was too much not to use these) to structure the code and a demand that the person working with the program keep a symbol table in their head as they go, or else refer back to the comments again and again to remind themselves what a piece of data is that is being operated on.

Code

Good Old Times
package main

import (
	"bytes"
	"fmt"
	"os"
	"regexp"
	"slices"
	"strconv"
	"strings"
)

// data1[0] is the stop words
// data1[1] is for storing parts of the source text
// data1[2] is for storing word bytes
// data1[3] is for storing parts of the word frequencies file
// data1[4] is for storing a word from the word frequencies file
var data1 [5][]byte

// data2[0] is the no. of bytes read from the source text
// data2[1] is for storing the source text file offset
// data2[2] is a count value for iterating parts of the source text
// data2[3] is for storing a frequency count from the word frequencies file
var data2 [4]int

// data3 is for storing errors
var data3 [2]error

// data4[0] is a generic flag
// data4[1] is a flag indicating a word has been found in the word frequencies file
var data4 [2]bool

// data5[0] is for storing the most frequent words
// data5[1] is for storing the stop words as string
var data5 [2][]string

// data6[0] is for storing the top frequencies
var data6 [1][]int

func main() {
	data1[0] = make([]byte, 552)
	f, _ := os.Open("stop_words.txt")
	f.Read(data1[0])
	f.Close()

	data5[1] = strings.Split(string(data1[0]), ",")
	data1[0] = nil

	f, _ = os.Open(os.Args[1])
	defer f.Close()

	wfs, _ := os.Create("word_freqs")
	defer wfs.Close()

	data1[1] = make([]byte, 464)
	data1[2] = make([]byte, 20)
	data1[3] = make([]byte, 26)

	// Part 1: Get the frequencies.
	for {
		data2[0], data3[0] = f.Read(data1[1])

		if data2[0] < 464 {
			data1[1] = data1[1][:data2[0]]
		}

		for data2[2] < len(data1[1]) {
			data4[0], _ = regexp.Match(`[A-Za-z0-9]`, data1[1][data2[2]:data2[2]+1])

			if !data4[0] {
				// We've found a word. Skip if it's only 1 character or a stop word.
				data1[2] = bytes.ToLower(data1[2])
				if len(data1[2]) >= 2 && !slices.Contains(data5[1], string(data1[2])) {
					wfs.Seek(0, 0)
					data4[1] = false
					for {
						_, data3[1] = wfs.Read(data1[3])

						if data3[1] != nil {
							break
						} else {
							// Get the word and the current frequency.
							data1[4] = bytes.Trim(bytes.Split(data1[3], []byte(","))[0], " ")
							data2[3], _ = strconv.Atoi(string(bytes.TrimRight(bytes.TrimLeft(bytes.Split(data1[3], []byte(","))[1], "0"), "\n")))

							if bytes.Equal(data1[2], data1[4]) {
								// The word is already in the frequency list.
								wfs.Seek(-26, 1)
								fmt.Fprintf(wfs, "%20s,%04d\n", data1[2], data2[3]+1)
								data4[1] = true
								break
							}
						}
					}
					if !data4[1] {
						// Add a new word to the frequency list.
						fmt.Fprintf(wfs, "%20s,%04d\n", data1[2], 1)
					}
				}

				// Reset the captured word.
				data1[2] = nil
			} else {
				data1[2] = append(data1[2], data1[1][data2[2] : data2[2]+1][0])
			}

			data2[2]++
		}

		if data3[0] != nil {
			break
		}
		data2[2] = 0
	}

	// Part 2: Get the 25 most frequently occurring words.
	wfs.Seek(0, 0)
	data2[0] = 0
	data5[0] = make([]string, 26)
	data6[0] = make([]int, 26)
	for {
		_, data3[1] = wfs.Read(data1[3])

		if data3[1] != nil {
			break
		} else {
			data1[4] = bytes.Trim(bytes.Split(data1[3], []byte(","))[0], " ")
			data2[3], _ = strconv.Atoi(string(bytes.TrimRight(bytes.TrimLeft(bytes.Split(data1[3], []byte(","))[1], "0"), "\n")))

			for data2[0] < 26 {
				if data6[0][data2[0]] < data2[3] {
					data5[0] = slices.Insert(data5[0], data2[0], string(data1[4]))[:26]
					data6[0] = slices.Insert(data6[0], data2[0], data2[3])[:26]
					break
				}
				data2[0]++
			}
		}
		data2[0] = 0
	}

	data2[0] = 0
	for data2[0] < 25 {
		fmt.Println(data5[0][data2[0]], "-", data6[0][data2[0]])
		data2[0]++
	}
}

Go Forth

Constraints

  • Existence of an all-important data stack. All operations (conditionals, arithmetic, etc.) are done over data on the stack.
  • Existence of a heap for storing data that’s needed for later operations. The heap data can be associated with names (i.e. variables). As said above, all operations are done over data on the stack, so any heap data that needs to be operated upon needs to be moved first to the stack and eventually back to the heap.
  • Abstraction in the form of user-defined “procedures” (i.e. names bound to a set of instructions), which may be called something else entirely.

Exercises in Programming Style

Commentary

This is an improvement on the Good Old Times style insofar as the memory constraint is lifted and the use of subroutines adds structure. This style also does not play well with Go’s type system and I’ve had to make type assertions throughout the code. Although the stack and heap setup does make a separation between data and actions that may be useful for thinking about programs in general (the same separation is made in the functional programming paradigm), it’s still a game to have in mind what exactly is being operated on when the stack data is being accessed.

Code

Go Forth
package main

import (
  "bytes"
  "fmt"
  "os"
  "regexp"
  "slices"
  "strings"
)

var stack Stack
var heap Heap

type Stack []any
type Heap map[string]any
type Pair struct {
  Word string
  Freq int
}

func main() {
  stack = make([]any, 0)
  heap = make(map[string]any)

  stack.Push(os.Args[1])

  readFile()
  filterChars()
  scan()
  removeStopWords()
  frequencies()
  sort()

  for _, heap["pair"] = range stack.Pop().([]Pair) {
  	stack.Push(heap["pair"])
  	fmt.Printf("%-10v %v\n", stack.Peek().(Pair).Word, stack.Pop().(Pair).Freq)
  }
}

func (s *Stack) Pop() any {
  pop := (*s)[len((*s))-1]
  *s = (*s)[:len((*s))-1]
  return pop
}

func (s *Stack) Push(e ...any) {
  *s = append(*s, e...)
}

func (s Stack) Peek() any {
  return s[len(s)-1]
}

func (s *Stack) Extend(e []string) {
  extend := make([]any, len(e))
  for i, v := range e {
  	extend[i] = v
  }
  *s = append(*s, extend...)
}

func readFile() {
  f, _ := os.ReadFile(stack.Pop().(string))
  stack.Push(f)
}

func filterChars() {
  stack.Push(regexp.MustCompile(`[\W_]+`))
  stack.Push(stack.Pop().(*regexp.Regexp).ReplaceAll(stack.Pop().([]byte), []byte(" ")))
}

func scan() {
  stack.Extend(strings.Split(strings.ToLower(string(stack.Pop().([]byte))), " "))
}

func removeStopWords() {
  f, _ := os.ReadFile("stop_words.txt")
  stack.Push(strings.Split(string(bytes.Trim(f, "\n")), ","))

  heap["stop_words"] = stack.Pop()
  heap["words"] = make([]string, 0)

  for len(stack) > 0 {
  	if len(stack.Peek().(string)) < 2 ||
  		slices.Contains(heap["stop_words"].([]string), stack.Peek().(string)) {
  		stack.Pop()
  	} else {
  		heap["words"] = append(heap["words"].([]string), stack.Pop().(string))
  	}
  }

  stack.Extend(heap["words"].([]string))
  heap["words"] = nil
  heap["stop_words"] = nil
}

func frequencies() {
  heap["word_freqs"] = make(map[string]int)

  for len(stack) > 0 {
  	stack.Push(heap["word_freqs"].(map[string]int)[stack.Peek().(string)])
  	stack.Push(1)
  	stack.Push(stack.Pop().(int) + stack.Pop().(int))
  	heap["freq"] = stack.Pop()
  	heap["word_freqs"].(map[string]int)[stack.Pop().(string)] = heap["freq"].(int)
  }

  stack.Push(heap["word_freqs"])
  heap["word_freqs"] = nil
}

func sort() {
  heap["sorted"] = make([]Pair, len(stack.Peek().(map[string]int)))

  for heap["word"], heap["freq"] = range stack.Pop().(map[string]int) {
  	stack.Push(heap["sorted"])

  	for heap["index"], heap["pair"] = range stack.Peek().([]Pair) {
  		stack.Push(heap["pair"].(Pair).Freq)
  		stack.Push(heap["freq"])

  		if stack.Pop().(int) > stack.Pop().(int) {
  			heap["sorted"] = slices.Insert(
  				stack.Pop().([]Pair),
  				heap["index"].(int),
  				Pair{Word: heap["word"].(string), Freq: heap["freq"].(int)})

  			break
  		}
  	}
  }
  stack.Push(heap["sorted"].([]Pair)[:25])
}

Arrays

Constraints

  • Main data type: arrays - a fixed-size collection of elements.
  • No explicit iteration; instead, an array is accessed by high-level, declarative operations.
  • Computation unfolds as search, selection, and transformation of fixed-size data.

Exercises in Programming Style

Commentary

The shift from strictly imperative programming to a more declarative style makes a huge difference. Again, the style doesn’t fit well with Go which is intended for imperative programming, so most of this code is setting up the functions needed to write the main function according to the style. But the main function showcases an analogy between contrasting programming styles and grammar, where a declarative style is like simple but meaningful sentences vs the compound sentences (holding the same meaning) of an imperative style. This is not to say that this makes imperative programming worse, but that it’s an option to choose in cases where you want to spell out a program more verbosely or allow others to do so, which may be most suitable when you’d like to get developers working within a system quickly without being encumbered by the additional background and education needed to express things more concisely.

Code

Arrays
package main

import (
	"fmt"
	"os"
	"regexp"
	"slices"
	"strings"
)

var words, stopWords Array

type Array []any
type Freqs []Pair
type Pair struct {
	Word string
	Freq int
}

func main() {
	WordsFromFile(os.Args[1]).
		Lower().
		Length(2).
		NotIn(WordsFromFile("stop_words.txt")).
		Freqs().
		SortFreqs().
		PrintFreqs(25)
}

func open(name string) string {
	f, _ := os.ReadFile(name)
	return string(f)
}

func WordsFromFile(name string) (rtn Array) {
	re := regexp.MustCompile(`[\W_]+`)
	s := strings.Trim(re.ReplaceAllString(open(name), " "), " ")

	spl := strings.Split(s, " ")
	rtn = make(Array, len(spl))

	for i, v := range spl {
		rtn[i] = v
	}
	return
}

func (a Array) Transform(t func(e any) any) Array {
	for i, v := range a {
		a[i] = t(v)
	}
	return a
}

func (a Array) Where(p func(e any) bool) (rtn Array) {
	rtn = make(Array, 0)
	for _, v := range a {
		if p(v) {
			rtn = append(rtn, v)
		}
	}
	return
}

func (a Array) Freqs() (rtn Freqs) {
	rtn = make(Freqs, 0)
	freqs := make(map[string]int)

	for _, v := range a {
		freqs[v.(string)]++
	}

	for k, v := range freqs {
		rtn = append(rtn, Pair{Word: k, Freq: v})
	}
	return
}

func (a Freqs) SortFreqs() Freqs {
	slices.SortFunc(a, func(x, y Pair) int {
		if x.Freq < y.Freq {
			return -1
		} else if x.Freq > y.Freq {
			return 1
		} else {
			return 0
		}
	})
	slices.Reverse(a)

	return a
}

func (a Freqs) PrintFreqs(i int) Freqs {
	for _, v := range a[:i] {
		fmt.Printf("%-10v %v\n", v.Word, v.Freq)
	}
	return a
}

func (a Array) Lower() Array {
	return a.Transform(func(e any) any { return strings.ToLower(e.(string)) })
}

func (a Array) NotIn(s []any) Array {
	return a.Where(func(e any) bool { return !slices.Contains(s, e) })
}

func (a Array) Length(l int) Array {
	return a.Where(func(e any) bool { return len(e.(string)) >= l })
}