Решение на Concurrent Crawling от Александър Деспотов

Обратно към всички решения

Към профила на Александър Деспотов

Резултати

  • 7 точки от тестове
  • 0 бонус точки
  • 7 точки общо
  • 8 успешни тест(а)
  • 3 неуспешни тест(а)

Код

package main
import (
"errors"
"io/ioutil"
"net/http"
"time"
)
func splitUrls(crnUrls []string, urlsQ chan string) {
for _, val := range crnUrls {
urlsQ <- val
}
}
func readUrl(url string, callback func(string) bool, result chan string, freeWorker chan interface{}) {
client := http.Client{Timeout: time.Duration(3 * time.Second)}
response, err := client.Get(url)
if err != nil {
freeWorker <- struct{}{}
} else {
defer response.Body.Close()
contents, _ := ioutil.ReadAll(response.Body)
if callback(string(contents)) {
result <- url
}
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if chunkedUrlsToCheck == nil || workersCount < 0 {
return "", errors.New("Error!")
}
urlsQ := make(chan string, 100)
result := make(chan string)
freeWorker := make(chan interface{})
defer close(urlsQ)
defer close(result)
defer close(freeWorker)
for {
select {
case lastRead, ok := <-chunkedUrlsToCheck:
if !ok {
return "", errors.New("Erorr!")
}
splitUrls(lastRead, urlsQ)
case res := <-result:
return res, nil
case newTask := <-urlsQ:
workersCount--
go readUrl(newTask, callback, result, freeWorker)
case <-freeWorker:
workersCount++
case <-time.After(time.Second * 15):
return "", errors.New("Timeout!")
}
}
}

Лог от изпълнението

[/tmp/go-build667833021/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithNegativeWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	0.005s
[/tmp/go-build338529682/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithZeroWorkersCount -test.timeout=120s]
--- FAIL: TestWithZeroWorkersCount-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (workersCount is zero)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1gskzgy	1.005s
[/tmp/go-build631691133/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithInvalidCallback -test.timeout=120s]
--- FAIL: TestWithInvalidCallback-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (callback is nil)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1gskzgy	1.005s
[/tmp/go-build420453648/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithNilChannel -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	0.007s
[/tmp/go-build709669538/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithClosedChannelWhenStarting -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	0.010s
[/tmp/go-build324300992/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithClosedChannelMidway -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	5.005s
[/tmp/go-build023551871/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWhetherGlobalTimeoutIsHandled -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	15.008s
[/tmp/go-build405011957/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestWithLoremIpsum -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	2.008s
[/tmp/go-build583313603/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestIfTimeoutAndErrorCodesAreHonoured -test.timeout=120s]
--- FAIL: TestIfTimeoutAndErrorCodesAreHonoured-2 (0.00 seconds)
	solution_test.go:267: Function returned 'http://127.0.0.2:43472/page_with_error_code' when it should have returned 'http://127.0.0.2:43472/correct_page'
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1gskzgy	0.006s
[/tmp/go-build898738108/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestRaceCondition -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	1.009s
[/tmp/go-build634411646/_/tmp/d20150111-16649-1gskzgy/_test/d20150111-16649-1gskzgy.test -test.run=TestCloseChannelBeforeFinish -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1gskzgy	1.006s

История (4 версии и 6 коментара)

Александър обнови решението на 09.12.2014 23:27 (преди над 3 години)

+package main
+
+import (
+ "errors"
+ "io/ioutil"
+ "net/http"
+ "time"
+)
+
+func splitUrls(crnUrls []string, urlsQ chan string) {
+ for _, val := range crnUrls {
+ urlsQ <- val
+ }
+}
+
+func readUrl(url string, callback func(string) bool, result chan string, freeWorker chan interface{}) {
+ client := http.Client{Timeout: time.Duration(3 * time.Second)}
+ response, err := client.Get(url)
+ if err != nil {
+ freeWorker <- struct{}{}
+ } else {
+ defer response.Body.Close()
+ contents, _ := ioutil.ReadAll(response.Body)
+ if callback(string(contents)) {
+ result <- url
+ }
+ }
+
+}
+
+func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
+ lastRead, ok := <-chunkedUrlsToCheck
+ if !ok || workersCount < 0 {
+ return "", errors.New("Error!")
+ }
+
+ urlsQ := make(chan string, 100)
+ result := make(chan string)
+ freeWorker := make(chan interface{})
+ splitUrls(lastRead, urlsQ)
+
+ for {
+ select {
+ case lastRead := <-chunkedUrlsToCheck:
+ splitUrls(lastRead, urlsQ)
+ case res := <-result:
+ return res, nil
+ case newTask := <-urlsQ:
+ workersCount--
+ go readUrl(newTask, callback, result, freeWorker)
+ case <-freeWorker:
+ workersCount++
+ case <-time.After(time.Second * 15):
+ return "", errors.New("Timeout!")
+ }
+ }
+
+ return lastRead[0], nil
+}

Александър обнови решението на 09.12.2014 23:29 (преди над 3 години)

package main
import (
"errors"
"io/ioutil"
"net/http"
"time"
)
func splitUrls(crnUrls []string, urlsQ chan string) {
for _, val := range crnUrls {
urlsQ <- val
}
}
func readUrl(url string, callback func(string) bool, result chan string, freeWorker chan interface{}) {
client := http.Client{Timeout: time.Duration(3 * time.Second)}
response, err := client.Get(url)
if err != nil {
freeWorker <- struct{}{}
} else {
defer response.Body.Close()
contents, _ := ioutil.ReadAll(response.Body)
if callback(string(contents)) {
result <- url
}
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
lastRead, ok := <-chunkedUrlsToCheck
if !ok || workersCount < 0 {
return "", errors.New("Error!")
}
urlsQ := make(chan string, 100)
result := make(chan string)
freeWorker := make(chan interface{})
+ defer close(urlsQ)
+ defer close(result)
+ defer close(freeWorker)
+
splitUrls(lastRead, urlsQ)
for {
select {
case lastRead := <-chunkedUrlsToCheck:
splitUrls(lastRead, urlsQ)
case res := <-result:
return res, nil
case newTask := <-urlsQ:
workersCount--
go readUrl(newTask, callback, result, freeWorker)
case <-freeWorker:
workersCount++
case <-time.After(time.Second * 15):
return "", errors.New("Timeout!")
}
}
return lastRead[0], nil
}

Виж внимателно дали проверяваш правилно за валидни входни параметри.

Също така, какво ще стане ако някой ти подаде валиден отворен канал chunkedUrlsToCheck, в който обаче никога не се подава стойност? По принцип след 15 секунди трябва да върнеш timeout, но при теб това ще стане ли?

И по какъв начин гарантираш, че веднъж щом си намерил валиден резултат ще го върнеш веднага, а няма да продължиш да "копаеш"? :)

Александър обнови решението на 10.12.2014 11:05 (преди над 3 години)

package main
import (
"errors"
"io/ioutil"
"net/http"
"time"
)
func splitUrls(crnUrls []string, urlsQ chan string) {
for _, val := range crnUrls {
urlsQ <- val
}
}
func readUrl(url string, callback func(string) bool, result chan string, freeWorker chan interface{}) {
client := http.Client{Timeout: time.Duration(3 * time.Second)}
response, err := client.Get(url)
if err != nil {
freeWorker <- struct{}{}
} else {
defer response.Body.Close()
contents, _ := ioutil.ReadAll(response.Body)
if callback(string(contents)) {
result <- url
}
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
- lastRead, ok := <-chunkedUrlsToCheck
- if !ok || workersCount < 0 {
+ if chunkedUrlsToCheck == nil || workersCount < 0 {
return "", errors.New("Error!")
}
urlsQ := make(chan string, 100)
result := make(chan string)
freeWorker := make(chan interface{})
defer close(urlsQ)
defer close(result)
defer close(freeWorker)
- splitUrls(lastRead, urlsQ)
-
for {
select {
case lastRead := <-chunkedUrlsToCheck:
splitUrls(lastRead, urlsQ)
case res := <-result:
return res, nil
case newTask := <-urlsQ:
workersCount--
go readUrl(newTask, callback, result, freeWorker)
case <-freeWorker:
workersCount++
case <-time.After(time.Second * 15):
return "", errors.New("Timeout!")
}
}
-
- return lastRead[0], nil
}

Да, сега е по-добре. Все още има какво да се оправи по проверките на входните данни, но си оправил проблема с вечното чакане на chunkedUrlsToCheck канала. Виж условието какво трябва да се случва при затварянето му обаче :)

Частта от условието "всички останали непроверени адреси трябва да се игнорират" е по-скоро задължителна :). Идеята е, че ако вече си намерил валиден адрес, не трябва да пускаш нови worker-и, които да започват да търсят по следващи адреси от канала. Бонус точките са ако спреш самите текущо изпълняващи се паралелни http заявки веднага щом намериш правилния адрес.

Александър обнови решението на 10.12.2014 12:10 (преди над 3 години)

package main
import (
"errors"
"io/ioutil"
"net/http"
"time"
)
func splitUrls(crnUrls []string, urlsQ chan string) {
for _, val := range crnUrls {
urlsQ <- val
}
}
func readUrl(url string, callback func(string) bool, result chan string, freeWorker chan interface{}) {
client := http.Client{Timeout: time.Duration(3 * time.Second)}
response, err := client.Get(url)
if err != nil {
freeWorker <- struct{}{}
} else {
defer response.Body.Close()
contents, _ := ioutil.ReadAll(response.Body)
if callback(string(contents)) {
result <- url
}
}
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if chunkedUrlsToCheck == nil || workersCount < 0 {
return "", errors.New("Error!")
}
urlsQ := make(chan string, 100)
result := make(chan string)
freeWorker := make(chan interface{})
defer close(urlsQ)
defer close(result)
defer close(freeWorker)
for {
select {
- case lastRead := <-chunkedUrlsToCheck:
+ case lastRead, ok := <-chunkedUrlsToCheck:
+ if !ok {
+ return "", errors.New("Erorr!")
+ }
splitUrls(lastRead, urlsQ)
case res := <-result:
return res, nil
case newTask := <-urlsQ:
workersCount--
go readUrl(newTask, callback, result, freeWorker)
case <-freeWorker:
workersCount++
case <-time.After(time.Second * 15):
return "", errors.New("Timeout!")
}
}
}

Да, но това може да измени крайния резултат... За да го оправя трябва да правя някви грозотии (или поне на мене не ми идва на акъл как да стане :) ). ще рискувам и ще го предам така :) Мерси за отделеното време поздрави! :)