Added fetchers concept: seperate scripts to fetch the feeds
Fetchers claim to be a certain client. They try to send the same headers as the original client. That's better than a simple curl request with a fake user agent, because curl doesn't send the other headers like the original client and therefore its traffic stands out.
This commit is contained in:
parent
8a80aa0d6d
commit
3a723b9440
10
fetchers/chrome
Executable file
10
fetchers/chrome
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#Tries more or less to look like Chrome
|
||||||
|
if [ $# -ne 2 ] ; then
|
||||||
|
echo "usage: $0 url output" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
#better randomize
|
||||||
|
useragent=$(shuf -n 1 $RANDRSS_ROOT/fetchers/chrome_agents)
|
||||||
|
|
||||||
|
curl "$1" -H 'Accept-Encoding: gzip, deflate, br' -H 'Accept-Language: en-US,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H "User-Agent: $useragent" -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Connection: keep-alive' -H 'Cache-Control: max-age=0' --compressed > $2
|
1
fetchers/chrome_agents
Normal file
1
fetchers/chrome_agents
Normal file
@ -0,0 +1 @@
|
|||||||
|
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36
|
11
fetchers/firefox
Executable file
11
fetchers/firefox
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -x
|
||||||
|
#Tries more or less to look like Firefox
|
||||||
|
if [ $# -ne 2 ] ; then
|
||||||
|
echo "usage: $0 url output" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
#better randomize
|
||||||
|
useragent=$(shuf -n 1 $RANDRSS_ROOT/fetchers/firefox_agents)
|
||||||
|
|
||||||
|
curl "$1" -H "User-Agent: $useragent" -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br' --compressed -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' > $2
|
2
fetchers/firefox_agents
Normal file
2
fetchers/firefox_agents
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
Mozilla/5.0 (X11; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0
|
||||||
|
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0
|
2
fetcherslist
Normal file
2
fetcherslist
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
fetchers/firefox
|
||||||
|
fetchers/chrome
|
23
randrss
23
randrss
@ -1,21 +1,28 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -x
|
set -x
|
||||||
set -e
|
set -e
|
||||||
|
#TODO: make this more robust
|
||||||
|
export RANDRSS_ROOT=$(pwd)
|
||||||
random_default=$( shuf -n 1 -i720-753)
|
random_default=$( shuf -n 1 -i720-753)
|
||||||
DEFAULT_PER_ITEM="1-$random_default"
|
DEFAULT_PER_ITEM="1-$random_default"
|
||||||
|
|
||||||
echo "Current default sleep seconds range: $DEFAULT_PER_ITEM"
|
echo "Current default sleep seconds range: $DEFAULT_PER_ITEM"
|
||||||
if [ $# -lt 1 ] ; then
|
if [ $# -lt 1 ] ; then
|
||||||
echo "Usage: $0 inputfile [user agents] [syncnow]"
|
echo "Usage: $0 inputfile fetchersfile [syncnow]"
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
inputfile="$1"
|
inputfile="$1"
|
||||||
useragentsfile="$2"
|
fetchersfile="$2"
|
||||||
if [ ! -f "$inputfile" ] ; then
|
if [ ! -f "$inputfile" ] ; then
|
||||||
echo "inputfile does not exist or is not readable" 1>&2
|
echo "inputfile does not exist or is not readable" 1>&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$fetchersfile" ] ; then
|
||||||
|
echo "fetchersfile does not exist or is not readable" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
syncnow=0
|
syncnow=0
|
||||||
if [ "$3" = "syncnow" ] ; then
|
if [ "$3" = "syncnow" ] ; then
|
||||||
syncnow=1
|
syncnow=1
|
||||||
@ -31,13 +38,11 @@ while true ; do
|
|||||||
else
|
else
|
||||||
sleepfor=$( shuf -i "$DEFAULT_PER_ITEM" -n 1)
|
sleepfor=$( shuf -i "$DEFAULT_PER_ITEM" -n 1)
|
||||||
fi
|
fi
|
||||||
useragent=""
|
fetcher=$( shuf -n 1 "$fetchersfile" )
|
||||||
if [ -n "$useragentsfile" ] ; then
|
|
||||||
useragent=$( shuf -n 1 "$useragentsfile" )
|
[ $syncnow -eq 1 ] || ( echo "Sleeping for $sleepfor seconds for $url, chosen fetcher $fetcher" && sleep "$sleepfor" )
|
||||||
fi
|
echo "Fetching $url with $fetcher"
|
||||||
echo "Sleeping for $sleepfor seconds for $url"
|
torsocks ./$fetcher "$url" "$output" || echo "Failed to fetch $url"
|
||||||
[ $syncnow -eq 1 ] || sleep "$sleepfor"
|
|
||||||
torsocks wget "$url" -U "$useragent" -O "$output" || echo "Failed to fetch $url"
|
|
||||||
done
|
done
|
||||||
[ $syncnow -eq 1 ] && exit
|
[ $syncnow -eq 1 ] && exit
|
||||||
done
|
done
|
||||||
|
Loading…
x
Reference in New Issue
Block a user