引用:
作者: netsonic
Traceback (most recent call last):
File "F:\code.py", line 6, in <module>
for line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0xbf in positio...
|
代码:
package main
import (
"bufio"
"fmt"
"log"
"os"
"strings"
)
func main() {
var filePath string;
pattern := "https://v.qq.com/x/page/"
if len(os.Args) > 1 {
filePath = os.Args[1]
}
if len(os.Args) > 2 {
pattern = os.Args[2]
}
if len(filePath) == 0 {
fmt.Println("No file path specified")
return
}
fFile, err := os.Open(filePath)
if err != nil {
panic(err)
}
defer fFile.Close()
scanner := bufio.NewScanner(fFile)
urls := map[string]struct{}{}
for scanner.Scan() {
text := scanner.Text()
if pos:=strings.Index(text, pattern); pos > 0 {
endpos := strings.Index(text[pos:], "\"")
url := text[pos:pos+endpos]
if _, ok := urls[url]; !ok {
fmt.Println(url)
urls[url] = struct{}{}
}
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
extracturl 文件名 <可选的 前缀>