Added fetchers concept: seperate scripts to fetch the feeds
Fetchers claim to be a certain client. They try to send the same headers as the original client. That's better than a simple curl request with a fake user agent, because curl doesn't send the other headers like the original client and therefore its traffic stands out.
Этот коммит содержится в:
23
randrss
23
randrss
@@ -1,21 +1,28 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
set -e
|
||||
#TODO: make this more robust
|
||||
export RANDRSS_ROOT=$(pwd)
|
||||
random_default=$( shuf -n 1 -i720-753)
|
||||
DEFAULT_PER_ITEM="1-$random_default"
|
||||
|
||||
echo "Current default sleep seconds range: $DEFAULT_PER_ITEM"
|
||||
if [ $# -lt 1 ] ; then
|
||||
echo "Usage: $0 inputfile [user agents] [syncnow]"
|
||||
echo "Usage: $0 inputfile fetchersfile [syncnow]"
|
||||
exit
|
||||
fi
|
||||
inputfile="$1"
|
||||
useragentsfile="$2"
|
||||
fetchersfile="$2"
|
||||
if [ ! -f "$inputfile" ] ; then
|
||||
echo "inputfile does not exist or is not readable" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$fetchersfile" ] ; then
|
||||
echo "fetchersfile does not exist or is not readable" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
syncnow=0
|
||||
if [ "$3" = "syncnow" ] ; then
|
||||
syncnow=1
|
||||
@@ -31,13 +38,11 @@ while true ; do
|
||||
else
|
||||
sleepfor=$( shuf -i "$DEFAULT_PER_ITEM" -n 1)
|
||||
fi
|
||||
useragent=""
|
||||
if [ -n "$useragentsfile" ] ; then
|
||||
useragent=$( shuf -n 1 "$useragentsfile" )
|
||||
fi
|
||||
echo "Sleeping for $sleepfor seconds for $url"
|
||||
[ $syncnow -eq 1 ] || sleep "$sleepfor"
|
||||
torsocks wget "$url" -U "$useragent" -O "$output" || echo "Failed to fetch $url"
|
||||
fetcher=$( shuf -n 1 "$fetchersfile" )
|
||||
|
||||
[ $syncnow -eq 1 ] || ( echo "Sleeping for $sleepfor seconds for $url, chosen fetcher $fetcher" && sleep "$sleepfor" )
|
||||
echo "Fetching $url with $fetcher"
|
||||
torsocks ./$fetcher "$url" "$output" || echo "Failed to fetch $url"
|
||||
done
|
||||
[ $syncnow -eq 1 ] && exit
|
||||
done
|
||||
|
Ссылка в новой задаче
Block a user