Решение на Concurrent Crawling от Мартин Добрев

Обратно към всички решения

Към профила на Мартин Добрев

Резултати

  • 5 точки от тестове
  • 0 бонус точки
  • 5 точки общо
  • 5 успешни тест(а)
  • 6 неуспешни тест(а)

Код

package main
import (
//"fmt"
"errors"
"io/ioutil"
"net/http"
"sync"
"time"
)
type queue struct {
conteiner []string
// pushEvent chan interface{}
empty bool
mutex sync.Mutex
}
func newQueue() *queue {
res := new(queue)
res.conteiner = make([]string, 0)
return res
}
func (q *queue) Empty() bool {
return q.empty
}
func (q *queue) Push(s string) {
q.mutex.Lock()
defer q.mutex.Unlock()
q.conteiner = append(q.conteiner, s)
q.empty = true
}
func (q *queue) Pop() string {
q.mutex.Lock()
defer q.mutex.Unlock()
res := q.conteiner[0]
q.conteiner = append(q.conteiner[:0], q.conteiner[1:]...)
if len(q.conteiner) == 0 {
q.empty = false
}
return res
}
type manager struct {
limit int
current int
release chan interface{}
mutex sync.Mutex
}
func newManager(limit int) *manager {
res := new(manager)
res.limit = limit
res.release = make(chan interface{})
res.current = 0
go func(m *manager) {
for {
_, ok := <-m.release
if !ok {
return
}
m.current--
time.Sleep(2 * time.Microsecond)
}
}(res)
return res
}
func (m *manager) AddTask(callback func(string) bool, url string, output chan<- string) {
m.mutex.Lock()
defer m.mutex.Unlock()
for {
if m.current < m.limit {
m.current++
go worker(callback, url, output, m)
return
}
time.Sleep(2 * time.Microsecond)
}
}
func worker(callback func(string) bool, url string, output chan<- string, m *manager) {
defer func(m *manager) {
m.release <- struct{}{}
}(m)
client := http.Client{
Timeout: time.Duration(3 * time.Second),
}
response, err := client.Get(url)
if err != nil {
return
} else {
defer response.Body.Close()
contents, err := ioutil.ReadAll(response.Body)
if err != nil {
return
}
con := string(contents[:])
if callback(con) {
output <- url
}
}
}
func bufferedUrls(buffer *queue, chunkedUrlsToCheck <-chan []string) {
for {
select {
case urls, ok := <-chunkedUrlsToCheck:
if !ok {
return
}
for _, val := range urls {
buffer.Push(val)
}
}
}
}
func helpSaD(callback func(string) bool, buffer *queue, workersCount int, output chan<- string) {
m := newManager(workersCount)
for {
if buffer.Empty() {
m.AddTask(callback, buffer.Pop(), output)
}
time.Sleep(2 * time.Microsecond)
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if workersCount < 0 {
return "", errors.New("worker count is incorect")
}
if chunkedUrlsToCheck == nil {
return "", errors.New("cunkedUrlsToCheck must be initialised")
}
buffer := newQueue()
go bufferedUrls(buffer, chunkedUrlsToCheck)
output := make(chan string)
go helpSaD(callback, buffer, workersCount, output)
select {
case res := <-output:
return res, nil
case <-time.After(15 * time.Second):
return "", errors.New("probably a timeout")
}
}

Лог от изпълнението

[/tmp/go-build630275979/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithNegativeWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-mg79qk	0.013s
[/tmp/go-build871531752/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithZeroWorkersCount -test.timeout=120s]
--- FAIL: TestWithZeroWorkersCount-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (workersCount is zero)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-mg79qk	1.006s
[/tmp/go-build055362542/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithInvalidCallback -test.timeout=120s]
--- FAIL: TestWithInvalidCallback-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (callback is nil)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-mg79qk	1.005s
[/tmp/go-build318625609/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithNilChannel -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-mg79qk	0.008s
[/tmp/go-build036123954/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithClosedChannelWhenStarting -test.timeout=120s]
--- FAIL: TestWithClosedChannelWhenStarting-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (the urls channel was closed)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-mg79qk	1.007s
[/tmp/go-build913339945/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithClosedChannelMidway -test.timeout=120s]
--- FAIL: TestWithClosedChannelMidway-2 (7.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 7 seconds: the urls channel was closed after 5 seconds
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-mg79qk	7.005s
[/tmp/go-build520493694/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWhetherGlobalTimeoutIsHandled -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-mg79qk	15.005s
[/tmp/go-build633307270/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestWithLoremIpsum -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-mg79qk	2.007s
[/tmp/go-build697257015/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestIfTimeoutAndErrorCodesAreHonoured -test.timeout=120s]
--- FAIL: TestIfTimeoutAndErrorCodesAreHonoured-2 (0.00 seconds)
	solution_test.go:267: Function returned 'http://127.0.0.2:41631/page_with_error_code' when it should have returned 'http://127.0.0.2:41631/correct_page'
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-mg79qk	0.006s
[/tmp/go-build085508931/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestRaceCondition -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-mg79qk	1.010s
[/tmp/go-build661457706/_/tmp/d20150111-16649-mg79qk/_test/d20150111-16649-mg79qk.test -test.run=TestCloseChannelBeforeFinish -test.timeout=120s]
--- FAIL: TestCloseChannelBeforeFinish-2 (2.00 seconds)
	solution_test.go:316: Function should have returned an error, channel was closed before it could finish
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-mg79qk	2.009s

История (3 версии и 1 коментар)

Мартин обнови решението на 09.12.2014 17:57 (преди над 3 години)

+package main
+
+import (
+ //"fmt"
+ "errors"
+ "io/ioutil"
+ "net/http"
+ "sync"
+ "time"
+)
+
+type queue struct {
+ conteiner []string
+ // pushEvent chan interface{}
+ empty bool
+ mutex sync.Mutex
+}
+
+func newQueue() *queue {
+ res := new(queue)
+ res.conteiner = make([]string, 0)
+ return res
+}
+
+func (q *queue) Empty() bool {
+ return q.empty
+}
+
+func (q *queue) Push(s string) {
+ q.mutex.Lock()
+ defer q.mutex.Unlock()
+ q.conteiner = append(q.conteiner, s)
+ q.empty = true
+}
+
+func (q *queue) Pop() string {
+ q.mutex.Lock()
+ defer q.mutex.Unlock()
+ res := q.conteiner[0]
+ q.conteiner = append(q.conteiner[:0], q.conteiner[1:]...)
+ if len(q.conteiner) == 0 {
+ q.empty = false
+ }
+ return res
+}
+
+type manager struct {
+ limit int
+ current int
+ release chan interface{}
+ mutex sync.Mutex
+}
+
+func newManager(limit int) *manager {
+ res := new(manager)
+ res.limit = limit
+ res.release = make(chan interface{})
+ res.current = 0
+
+ go func(m *manager) {
+ for {
+ _, ok := <-m.release
+ if !ok {
+ return
+ }
+ m.current--
+ time.Sleep(2 * time.Microsecond)
+ }
+ }(res)
+
+ return res
+}
+
+func (m *manager) AddTask(callback func(string) bool, url string, output chan<- string) {
+ m.mutex.Lock()
+ defer m.mutex.Unlock()
+ for {
+ if m.current < m.limit {
+ m.current++
+ go worker(callback, url, output, m)
+ return
+ }
+ time.Sleep(2 * time.Microsecond)
+ }
+}
+
+func worker(callback func(string) bool, url string, output chan<- string, m *manager) {
+
+ defer func(m *manager) {
+ m.release <- struct{}{}
+ }(m)
+ response, err := http.Get(url)
+ if err != nil {
+ return
+ } else {
+ defer response.Body.Close()
+ contents, err := ioutil.ReadAll(response.Body)
+ if err != nil {
+ return
+ }
+ con := string(contents[:])
+ if callback(con) {
+ output <- url
+ }
+ }
+}
+
+func bufferedUrls(buffer *queue, chunkedUrlsToCheck <-chan []string) {
+ for {
+ select {
+ case urls, ok := <-chunkedUrlsToCheck:
+ if !ok {
+ return
+ }
+ for _, val := range urls {
+ buffer.Push(val)
+ }
+ }
+ }
+}
+
+func helpSaD(callback func(string) bool, buffer *queue, workersCount int, output chan<- string) {
+ m := newManager(workersCount)
+ for {
+
+ if buffer.Empty() {
+ m.AddTask(callback, buffer.Pop(), output)
+ }
+ time.Sleep(2 * time.Microsecond)
+ }
+}
+
+func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
+ buffer := newQueue()
+ go bufferedUrls(buffer, chunkedUrlsToCheck)
+ output := make(chan string)
+ go helpSaD(callback, buffer, workersCount, output)
+ select {
+ case res := <-output:
+ return res, nil
+ case <-time.After(15 * time.Second):
+ return "", errors.New("probably a timeout")
+ }
+}

Мартин обнови решението на 09.12.2014 18:05 (преди над 3 години)

package main
import (
//"fmt"
"errors"
"io/ioutil"
"net/http"
"sync"
"time"
)
type queue struct {
conteiner []string
// pushEvent chan interface{}
empty bool
mutex sync.Mutex
}
func newQueue() *queue {
res := new(queue)
res.conteiner = make([]string, 0)
return res
}
func (q *queue) Empty() bool {
return q.empty
}
func (q *queue) Push(s string) {
q.mutex.Lock()
defer q.mutex.Unlock()
q.conteiner = append(q.conteiner, s)
q.empty = true
}
func (q *queue) Pop() string {
q.mutex.Lock()
defer q.mutex.Unlock()
res := q.conteiner[0]
q.conteiner = append(q.conteiner[:0], q.conteiner[1:]...)
if len(q.conteiner) == 0 {
q.empty = false
}
return res
}
type manager struct {
limit int
current int
release chan interface{}
mutex sync.Mutex
}
func newManager(limit int) *manager {
res := new(manager)
res.limit = limit
res.release = make(chan interface{})
res.current = 0
go func(m *manager) {
for {
_, ok := <-m.release
if !ok {
return
}
m.current--
time.Sleep(2 * time.Microsecond)
}
}(res)
return res
}
func (m *manager) AddTask(callback func(string) bool, url string, output chan<- string) {
m.mutex.Lock()
defer m.mutex.Unlock()
for {
if m.current < m.limit {
m.current++
go worker(callback, url, output, m)
return
}
time.Sleep(2 * time.Microsecond)
}
}
func worker(callback func(string) bool, url string, output chan<- string, m *manager) {
defer func(m *manager) {
m.release <- struct{}{}
}(m)
- response, err := http.Get(url)
+
+ client := http.Client{
+ Timeout: time.Duration(3 * time.Second),
+ }
+ response, err := client.Get(url)
if err != nil {
return
} else {
defer response.Body.Close()
contents, err := ioutil.ReadAll(response.Body)
if err != nil {
return
}
con := string(contents[:])
if callback(con) {
output <- url
}
}
}
func bufferedUrls(buffer *queue, chunkedUrlsToCheck <-chan []string) {
for {
select {
case urls, ok := <-chunkedUrlsToCheck:
if !ok {
return
}
for _, val := range urls {
buffer.Push(val)
}
}
}
}
func helpSaD(callback func(string) bool, buffer *queue, workersCount int, output chan<- string) {
m := newManager(workersCount)
for {
if buffer.Empty() {
m.AddTask(callback, buffer.Pop(), output)
}
time.Sleep(2 * time.Microsecond)
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
buffer := newQueue()
go bufferedUrls(buffer, chunkedUrlsToCheck)
output := make(chan string)
go helpSaD(callback, buffer, workersCount, output)
select {
case res := <-output:
return res, nil
case <-time.After(15 * time.Second):
return "", errors.New("probably a timeout")
}
-}
+}

Помисли дали има начин да избегнеш безкрайни цикли с time.Sleep(2 * time.Microsecond), много рядко това е най-доброто решение :). Обикновено с канали и select може да се направи по-добра провека отколкото с poll-ване на някакъв интервал.

Също така, прочети условието отново - трябва да провериш дали параметрите, които функцията получава, са валидни.

Мартин обнови решението на 10.12.2014 23:19 (преди над 3 години)

package main
import (
//"fmt"
"errors"
"io/ioutil"
"net/http"
"sync"
"time"
)
type queue struct {
conteiner []string
// pushEvent chan interface{}
empty bool
mutex sync.Mutex
}
func newQueue() *queue {
res := new(queue)
res.conteiner = make([]string, 0)
return res
}
func (q *queue) Empty() bool {
return q.empty
}
func (q *queue) Push(s string) {
q.mutex.Lock()
defer q.mutex.Unlock()
q.conteiner = append(q.conteiner, s)
q.empty = true
}
func (q *queue) Pop() string {
q.mutex.Lock()
defer q.mutex.Unlock()
res := q.conteiner[0]
q.conteiner = append(q.conteiner[:0], q.conteiner[1:]...)
if len(q.conteiner) == 0 {
q.empty = false
}
return res
}
type manager struct {
limit int
current int
release chan interface{}
mutex sync.Mutex
}
func newManager(limit int) *manager {
res := new(manager)
res.limit = limit
res.release = make(chan interface{})
res.current = 0
go func(m *manager) {
for {
_, ok := <-m.release
if !ok {
return
}
m.current--
time.Sleep(2 * time.Microsecond)
}
}(res)
return res
}
func (m *manager) AddTask(callback func(string) bool, url string, output chan<- string) {
m.mutex.Lock()
defer m.mutex.Unlock()
for {
if m.current < m.limit {
m.current++
go worker(callback, url, output, m)
return
}
time.Sleep(2 * time.Microsecond)
}
}
func worker(callback func(string) bool, url string, output chan<- string, m *manager) {
defer func(m *manager) {
m.release <- struct{}{}
}(m)
client := http.Client{
Timeout: time.Duration(3 * time.Second),
}
response, err := client.Get(url)
if err != nil {
return
} else {
defer response.Body.Close()
contents, err := ioutil.ReadAll(response.Body)
if err != nil {
return
}
con := string(contents[:])
if callback(con) {
output <- url
}
}
}
func bufferedUrls(buffer *queue, chunkedUrlsToCheck <-chan []string) {
for {
select {
case urls, ok := <-chunkedUrlsToCheck:
if !ok {
return
}
for _, val := range urls {
buffer.Push(val)
}
}
}
}
func helpSaD(callback func(string) bool, buffer *queue, workersCount int, output chan<- string) {
m := newManager(workersCount)
for {
if buffer.Empty() {
m.AddTask(callback, buffer.Pop(), output)
}
time.Sleep(2 * time.Microsecond)
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
+ if workersCount < 0 {
+ return "", errors.New("worker count is incorect")
+ }
+
+ if chunkedUrlsToCheck == nil {
+ return "", errors.New("cunkedUrlsToCheck must be initialised")
+ }
+
buffer := newQueue()
go bufferedUrls(buffer, chunkedUrlsToCheck)
output := make(chan string)
go helpSaD(callback, buffer, workersCount, output)
select {
case res := <-output:
return res, nil
case <-time.After(15 * time.Second):
return "", errors.New("probably a timeout")
}
}