Решение на Concurrent Crawling от Йордан Пулов

Резултати

7 точки от тестове
0 бонус точки
7 точки общо

8 успешни тест(а)
3 неуспешни тест(а)

Код

package main

import (

        "errors"

        "fmt"

        "io/ioutil"

        "net/http"

        "time"

func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {

        // wrong numbers of workers

        if workersCount <= 0 {

                return "", errors.New("Workers are less than 1")

        var urlsQueue []string

        currentUrl := make(chan string)

        gotIt := make(chan string)

        closedChan := make(chan struct{})

        // buffering the chans , cannot make it without another goroute

        go func() {

                        if len(urlsQueue) > 0 {

                                case currentUrl <- urlsQueue[0]: // reading from the buffer

                                        urlsQueue = urlsQueue[1:]

                                case urls, ok := <-chunkedUrlsToCheck: // writing in the buffer

                                                closedChan <- struct{}{}

                                        for _, url := range urls {

                                                urlsQueue = append(urlsQueue, url)

                                // insertion of the first array with urls

                                urls, ok := <-chunkedUrlsToCheck

                                if ok == false {

                                        closedChan <- struct{}{}

                                for _, url := range urls {

                                        urlsQueue = append(urlsQueue, url)

        // spawn all workers at once .... not good I know :(

        for i := 0; i < workersCount; i++ {

                go func() {

                                // get the next url

                                url := <-currentUrl

                                resp, urlError := http.Get(url)

                                // check for url or status code

                                if urlError != nil || resp.StatusCode/100 != 2 {

                                // read the HTML

                                html, err := ioutil.ReadAll(resp.Body)

                                resp.Body.Close()

                                // problems in parsing the HTML

                                if err != nil {

                                //check for the callback

                                if callback(fmt.Sprintf("%s", html)) {

        // waiting for the result

        select {

        case url := <-gotIt:

                // when the parent dies  ... all children die as well

                return url, nil

        case <-time.After(15 * time.Second):

                return "", errors.New("Time expired")

        case <-closedChan:

                return "", errors.New("Closed Chan")

Лог от изпълнението

[/tmp/go-build058675880/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithNegativeWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	0.005s
[/tmp/go-build851282701/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithZeroWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	0.005s
[/tmp/go-build525791759/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithInvalidCallback -test.timeout=120s]
--- FAIL: TestWithInvalidCallback-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (callback is nil)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1h9gz9s	1.006s
[/tmp/go-build242740108/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithNilChannel -test.timeout=120s]
--- FAIL: TestWithNilChannel-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (channel is uninitialized)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1h9gz9s	1.007s
[/tmp/go-build501991323/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithClosedChannelWhenStarting -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	0.007s
[/tmp/go-build124363101/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithClosedChannelMidway -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	5.005s
[/tmp/go-build887753376/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWhetherGlobalTimeoutIsHandled -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	15.006s
[/tmp/go-build633489714/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithLoremIpsum -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	2.006s
[/tmp/go-build220103003/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestIfTimeoutAndErrorCodesAreHonoured -test.timeout=120s]
--- FAIL: TestIfTimeoutAndErrorCodesAreHonoured-2 (5.00 seconds)
	solution_test.go:267: Function returned 'http://127.0.0.2:58595/page_over_3_seconds' when it should have returned 'http://127.0.0.2:58595/correct_page'
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1h9gz9s	5.009s
[/tmp/go-build102349078/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestRaceCondition -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	1.007s
[/tmp/go-build014744696/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestCloseChannelBeforeFinish -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	1.007s

История (1 версия и 1 коментар)

Йордан обнови решението на 10.12.2014 22:48 (преди над 3 години)

+package main

+import (

+        "errors"

+        "fmt"

+        "io/ioutil"

+        "net/http"

+        "time"

+func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {

+        // wrong numbers of workers

+        if workersCount <= 0 {

+                return "", errors.New("Workers are less than 1")

+        var urlsQueue []string

+        currentUrl := make(chan string)

+        gotIt := make(chan string)

+        closedChan := make(chan struct{})

+        // buffering the chans , cannot make it without another goroute

+        go func() {

+                        if len(urlsQueue) > 0 {

+                                case currentUrl <- urlsQueue[0]: // reading from the buffer

+                                        urlsQueue = urlsQueue[1:]

+                                case urls, ok := <-chunkedUrlsToCheck: // writing in the buffer

+                                        if ok == false {

+                                                closedChan <- struct{}{}

+                                        for _, url := range urls {

+                                                urlsQueue = append(urlsQueue, url)

+                                // insertion of the first array with urls

+                                urls, ok := <-chunkedUrlsToCheck

+                                if ok == false {

+                                        closedChan <- struct{}{}

+                                for _, url := range urls {

+                                        urlsQueue = append(urlsQueue, url)

+        }()

+        // spawn all workers at once .... not good I know :(

+        for i := 0; i < workersCount; i++ {

+                go func() {

+                                // get the next url

+                                url := <-currentUrl

+                                resp, urlError := http.Get(url)

+                                // check for url or status code

+                                if urlError != nil || resp.StatusCode/100 != 2 {

+                                // read the HTML

+                                html, err := ioutil.ReadAll(resp.Body)

+                                resp.Body.Close()

+                                // problems in parsing the HTML

+                                if err != nil {

+                                //check for the callback

+                                if callback(fmt.Sprintf("%s", html)) {

+        // waiting for the result

+        select {

+        case url := <-gotIt:

+                // when the parent dies  ... all children die as well

+                return url, nil

+        case <-time.After(15 * time.Second):

+                return "", errors.New("Time expired")

+        case <-closedChan:

+                return "", errors.New("Closed Chan")

Провери дали не си изпуснал някои от дребните подробности в условието за валидация и чакане :)

Публикувано преди над 3 години

Програмиране с Go

Курс във Факултета по Математика и Информатика към СУ

Решение на Concurrent Crawling от Йордан Пулов

Резултати

Код

Лог от изпълнението

История (1 версия и 1 коментар)

Йордан обнови решението на 10.12.2014 22:48 (преди над 3 години)