Added fetchers concept: seperate scripts to fetch the feeds
Fetchers claim to be a certain client. They try to send the same headers as the original client. That's better than a simple curl request with a fake user agent, because curl doesn't send the other headers like the original client and therefore its traffic stands out.
This commit is contained in:
والد
8a80aa0d6d
کامیت
3a723b9440
10
fetchers/chrome
Executable file
10
fetchers/chrome
Executable file
@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
#Tries more or less to look like Chrome
|
||||
if [ $# -ne 2 ] ; then
|
||||
echo "usage: $0 url output" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
#better randomize
|
||||
useragent=$(shuf -n 1 $RANDRSS_ROOT/fetchers/chrome_agents)
|
||||
|
||||
curl "$1" -H 'Accept-Encoding: gzip, deflate, br' -H 'Accept-Language: en-US,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H "User-Agent: $useragent" -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Connection: keep-alive' -H 'Cache-Control: max-age=0' --compressed > $2
|
1
fetchers/chrome_agents
Normal file
1
fetchers/chrome_agents
Normal file
@ -0,0 +1 @@
|
||||
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36
|
11
fetchers/firefox
Executable file
11
fetchers/firefox
Executable file
@ -0,0 +1,11 @@
|
||||
#!/bin/sh
|
||||
set -x
|
||||
#Tries more or less to look like Firefox
|
||||
if [ $# -ne 2 ] ; then
|
||||
echo "usage: $0 url output" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
#better randomize
|
||||
useragent=$(shuf -n 1 $RANDRSS_ROOT/fetchers/firefox_agents)
|
||||
|
||||
curl "$1" -H "User-Agent: $useragent" -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br' --compressed -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' > $2
|
2
fetchers/firefox_agents
Normal file
2
fetchers/firefox_agents
Normal file
@ -0,0 +1,2 @@
|
||||
Mozilla/5.0 (X11; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0
|
||||
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0
|
2
fetcherslist
Normal file
2
fetcherslist
Normal file
@ -0,0 +1,2 @@
|
||||
fetchers/firefox
|
||||
fetchers/chrome
|
23
randrss
23
randrss
@ -1,21 +1,28 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
set -e
|
||||
#TODO: make this more robust
|
||||
export RANDRSS_ROOT=$(pwd)
|
||||
random_default=$( shuf -n 1 -i720-753)
|
||||
DEFAULT_PER_ITEM="1-$random_default"
|
||||
|
||||
echo "Current default sleep seconds range: $DEFAULT_PER_ITEM"
|
||||
if [ $# -lt 1 ] ; then
|
||||
echo "Usage: $0 inputfile [user agents] [syncnow]"
|
||||
echo "Usage: $0 inputfile fetchersfile [syncnow]"
|
||||
exit
|
||||
fi
|
||||
inputfile="$1"
|
||||
useragentsfile="$2"
|
||||
fetchersfile="$2"
|
||||
if [ ! -f "$inputfile" ] ; then
|
||||
echo "inputfile does not exist or is not readable" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$fetchersfile" ] ; then
|
||||
echo "fetchersfile does not exist or is not readable" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
syncnow=0
|
||||
if [ "$3" = "syncnow" ] ; then
|
||||
syncnow=1
|
||||
@ -31,13 +38,11 @@ while true ; do
|
||||
else
|
||||
sleepfor=$( shuf -i "$DEFAULT_PER_ITEM" -n 1)
|
||||
fi
|
||||
useragent=""
|
||||
if [ -n "$useragentsfile" ] ; then
|
||||
useragent=$( shuf -n 1 "$useragentsfile" )
|
||||
fi
|
||||
echo "Sleeping for $sleepfor seconds for $url"
|
||||
[ $syncnow -eq 1 ] || sleep "$sleepfor"
|
||||
torsocks wget "$url" -U "$useragent" -O "$output" || echo "Failed to fetch $url"
|
||||
fetcher=$( shuf -n 1 "$fetchersfile" )
|
||||
|
||||
[ $syncnow -eq 1 ] || ( echo "Sleeping for $sleepfor seconds for $url, chosen fetcher $fetcher" && sleep "$sleepfor" )
|
||||
echo "Fetching $url with $fetcher"
|
||||
torsocks ./$fetcher "$url" "$output" || echo "Failed to fetch $url"
|
||||
done
|
||||
[ $syncnow -eq 1 ] && exit
|
||||
done
|
||||
|
بارگذاری…
x
مرجع در شماره جدید
Block a user