Golang membaca file besar

1package main
  2
  3import (
  4	"bufio"
  5	"fmt"
  6	"log"
  7	"os"
  8	"strconv"
  9	"strings"
 10	"sync"
 11	"sync/atomic"
 12	"time"
 13)
 14
 15func main() {
 16	start := time.Now()
 17	file, err := os.Open(os.Args[1])
 18	if err != nil {
 19		log.Fatal(err)
 20	}
 21	defer file.Close()
 22
 23	commonName := ""
 24	commonCount := 0
 25	scanner := bufio.NewScanner(file)
 26	nameMap := make(map[string]int)
 27	dateMap := make(map[int]int)
 28
 29	namesCounted := false
 30	namesCount := 0
 31	fileLineCount := int64(0)
 32
 33	type entry struct {
 34		firstName string
 35		name      string
 36		date      int
 37	}
 38
 39	linesChunkLen := 64 * 1024
 40	linesChunkPoolAllocated := int64(0)
 41	linesPool := sync.Pool{New: func() interface{} {
 42		lines := make([]string, 0, linesChunkLen)
 43		atomic.AddInt64(&linesChunkPoolAllocated, 1)
 44		return lines
 45	}}
 46	lines := linesPool.Get().([]string)[:0]
 47
 48	entriesPoolAllocated := int64(0)
 49	entriesPool := sync.Pool{New: func() interface{} {
 50		entries := make([]entry, 0, linesChunkLen)
 51		atomic.AddInt64(&entriesPoolAllocated, 1)
 52		return entries
 53	}}
 54	mutex := &sync.Mutex{}
 55	wg := sync.WaitGroup{}
 56
 57	scanner.Scan()
 58	for {
 59		lines = append(lines, scanner.Text())
 60		willScan := scanner.Scan()
 61		if len(lines) == linesChunkLen || !willScan {
 62			linesToProcess := lines
 63			wg.Add(len(linesToProcess))
 64			go func() {
 65				atomic.AddInt64(&fileLineCount, int64(len(linesToProcess)))
 66				entries := entriesPool.Get().([]entry)[:0]
 67				for _, text := range linesToProcess {
 68					// get all the names
 69					entry := entry{}
 70					split := strings.SplitN(text, "|", 9)
 71					entry.name = strings.TrimSpace(split[7])
 72
 73					// extract first names
 74					if entry.name != "" {
 75						startOfName := strings.Index(entry.name, ", ") + 2
 76						if endOfName := strings.Index(entry.name[startOfName:], " "); endOfName < 0 {
 77							entry.firstName = entry.name[startOfName:]
 78						} else {
 79							entry.firstName = entry.name[startOfName : startOfName+endOfName]
 80						}
 81						if cs := strings.Index(entry.firstName, ","); cs > 0 {
 82							entry.firstName = entry.firstName[:cs]
 83						}
 84					}
 85					// extract dates
 86					entry.date, _ = strconv.Atoi(split[4][:6])
 87					entries = append(entries, entry)
 88				}
 89				linesPool.Put(linesToProcess)
 90				mutex.Lock()
 91				for _, entry := range entries {
 92					if len(entry.firstName) != 0 {
 93						nameCount := nameMap[entry.firstName] + 1
 94						nameMap[entry.firstName] = nameCount
 95						if nameCount > commonCount {
 96							commonCount = nameCount
 97							commonName = entry.firstName
 98						}
 99					}
100					if namesCounted == false {
101						if namesCount == 0 {
102							fmt.Printf("Name: %s at index: %v\n", entry.name, 0)
103						} else if namesCount == 432 {
104							fmt.Printf("Name: %s at index: %v\n", entry.name, 432)
105						} else if namesCount == 43243 {
106							fmt.Printf("Name: %s at index: %v\n", entry.name, 43243)
107							namesCounted = true
108						}
109						namesCount++
110					}
111					dateMap[entry.date]++
112				}
113				mutex.Unlock()
114				entriesPool.Put(entries)
115				wg.Add(-len(entries))
116			}()
117			lines = linesPool.Get().([]string)[:0]
118		}
119		if !willScan {
120			break
121		}
122	}
123	wg.Wait()
124
125	// report c2: names at index
126	fmt.Printf("Name time: %v\n", time.Since(start))
127
128	// report c1: total number of lines
129	fmt.Printf("Total file line count: %v\n", fileLineCount)
130	fmt.Printf("Line count time: %v\n", time.Since(start))
131
132	// report c3: donation frequency
133	for k, v := range dateMap {
134		fmt.Printf("Donations per month and year: %v and donation ncount: %v\n", k, v)
135	}
136	fmt.Printf("Donations time: %v\n", time.Since(start))
137
138	// report c4: most common firstName
139	fmt.Printf("The most common first name is: %s and it occurs: %v times.\n", commonName, commonCount)
140	fmt.Printf("Most common name time: %v\n", time.Since(start))
141}
Tired Tamarin