gh-starred-to-opml/gh-starred-to-opml.sh

233 lines
8 KiB
Bash

#!/usr/bin/env bash
shopt -s extglob
export LC_ALL="C.UTF-8"
export TZ=:/etc/localtime
Help()
{
echo "Generate an OPML 2.0 file to follow releases of starred repositories on Github"
echo "Default to all starred repos of the user, or a specific list with [-l list]."
echo "Uses the Github API to check lists and if the starred repos contain Release (preferred) or Tag entries."
echo
echo "Syntax: ./gh-starred-to-opml.sh [-h] -u user [-l listname] [-d date] [-o filename] [-n filename] -t token"
echo "options:"
echo "-h Print this Help"
echo "-u string required: Github username"
echo "-l string optional: Github Stars List (url shortname)"
echo "-d string optional: ISO8601 date to filter out starred before (YYYY-mm-ddTHH:MM:SSZ)"
echo "-o string.opml optional: destination opml filename"
echo "-n string.opml Use a previous file to generate updates,"
echo " it does not require any other option,"
echo " but it uses starred_at dates, not starred in a list."
echo "-t string required: Github API token to avoid rate limits."
echo
}
unset -v _USERNAME _LISTNAME _DATEFILTER _FILENAME _OLDFILENAME _DATA
while getopts "hu:l:d:o:n:t:" option; do
case $option in
h) Help; exit;;
u) _USERNAME="$OPTARG";;
l) _LISTNAME="$OPTARG";;
d) _DATEFILTER="$OPTARG";;
o) _FILENAME="$OPTARG";;
n) _OLDFILENAME="$OPTARG"
unset -v _USERNAME _LISTNAME _DATEFILTER _FILENAME _DATA;;
t) _GITHUBTOKEN="$OPTARG";;
\?) echo -e "Unknown option: -$OPTARG \n" >&2; Help; exit 1;;
: ) echo -e "Missing argument for -$OPTARG \n" >&2; Help; exit 1;;
* ) echo -e "Unimplemented option: -$option \n" >&2; Help; exit 1;;
esac
done
if [[ -n $_OLDFILENAME ]]; then
# parsing the source OPML file to generate values for the update
if [[ ! -f "${_OLDFILENAME}" ]]; then
echo "Missing source file ${_OLDFILENAME}" >&2
echo
Help
exit 1
fi
_FILENAME=${_OLDFILENAME/%*(_+([0-9]).opml)*(.opml)/_$(date '+%y%m%d%H%M').opml}
_USERNAME=$(grep "ownerName" "${_OLDFILENAME}" | sed -nr 's,.+Name>(.+)<\/ownerName.+,\1,p')
_LISTNAME=$(grep "outline" "${_OLDFILENAME}" | sed -nr 's,.+Github - (.+) - .+,\1,p')
_DATEFILTER=$(LC_ALL="C.UTF-8" date -u -d "$(grep "dateModified" "${_OLDFILENAME}" | sed -nr 's,.+dateModified>(.+)<\/dateModified.+,\1,p')" '+%Y-%m-%dT%H:%M:%S%Z')
fi
if [[ -z $_USERNAME ]]; then
echo 'Missing Github username' >&2
echo
Help
exit 1
fi
if [[ -z $_GITHUBTOKEN ]]; then
echo 'Missing Github API token' >&2
echo
Help
exit 1
fi
if [[ -z $_FILENAME ]]; then
if [[ -z $_LISTNAME ]]; then
_FILENAME="gh_starred_${_USERNAME}.opml"
else
_FILENAME="gh_starred_${_USERNAME}_${_LISTNAME}.opml"
fi
fi
_CMD_ARRAY=( curl jq pup )
for cmd in "${_CMD_ARRAY[@]}"; do
if [[ -z $(command -v "$cmd") ]]; then
echo "Requirements: $cmd could not be found" >&2
echo ' - sudo apt install curl jq' >&2
echo ' - go install github.com/ericchiang/pup@latest' >&2
exit 1
fi
done
trap "echo '⯅ Github API rate limit exceeded or Token issue.'; exit 1" TERM
export MAIN_PID=$$
# request to Github API with rate limit trap
ghapirequest() {
local _GITHUB
_GITHUB=$(curl \
--max-time 120 \
-H "Authorization: token ${_GITHUBTOKEN}" \
-H "Accept: application/vnd.github.v3.star+json" \
-sSL "https://api.github.com/$1")
if (echo "${_GITHUB}" | grep -q 'API rate limit exceeded'); then
kill -s TERM $MAIN_PID
else
echo "${_GITHUB}"
fi
}
# request to Github lists
ghrequest() {
curl --max-time 120 -sSL "https://github.com/$1"
}
# parsing Github API for all user's starred repositories
_PAGE=1
echo "- Parsing Github API for ${_USERNAME}: page $_PAGE"
_DATA=$(ghapirequest "users/${_USERNAME}/starred?page=${_PAGE}&per_page=100" | jq '.[] | {id: .repo.id, name: .repo.full_name, desc: .repo.description, date: .starred_at}')
_PAGE=$((_PAGE+1))
while [ $_PAGE -ge 2 ]; do
unset _NEW_DATA
echo "- Parsing Github API for ${_USERNAME}: page $_PAGE"
_NEW_DATA=$(ghapirequest "users/${_USERNAME}/starred?page=${_PAGE}&per_page=100" | jq '.[] | {id: .repo.id, name: .repo.full_name, desc: .repo.description, date: .starred_at}')
if [ "$_NEW_DATA" ]; then
_DATA=$(echo -e "${_DATA}\n${_NEW_DATA}")
_PAGE=$((_PAGE+1))
else
echo "- Page ${_PAGE} was the last one"
_PAGE=1
fi
done
echo "- Sorting results by name"
_DATA=$(echo "${_DATA}" | jq -s -c 'sort_by(.name) | .[]')
if [[ -n $_DATEFILTER ]]; then
echo "- Filtering out repositories starred before ${_DATEFILTER}"
_DATA=$(echo "${_DATA}" | jq -c --arg date "${_DATEFILTER}" 'select( .date > $date )')
fi
# parsing Github list pages (not available in the API)
if [[ -n $_LISTNAME ]]; then
_PAGE=1
echo "- Parsing Github \"${_LISTNAME}\" List for ${_USERNAME}: page $_PAGE"
_LIST=$(ghrequest "stars/${_USERNAME}/lists/${_LISTNAME}?page=1")
_NEXT=$(echo "${_LIST}" | pup '#user-list-repositories div div a[class="next_page"] text{}')
_LIST=$(echo "${_LIST}" | pup '#user-list-repositories div div h3 a attr{href}')
if [[ -n $_NEXT ]]; then
_PAGE=$((_PAGE+1))
fi
while [ $_PAGE -ge 2 ]; do
unset _NEW_LIST _NEXT
echo "- Parsing Github \"${_LISTNAME}\" List for ${_USERNAME}: page $_PAGE"
_NEW_LIST=$(ghrequest "stars/${_USERNAME}/lists/${_LISTNAME}?page=${_PAGE}")
_NEXT=$(echo "${_NEW_LIST}" | pup '#user-list-repositories div div a[class="next_page"] text{}')
_NEW_LIST=$(echo "${_NEW_LIST}" | pup '#user-list-repositories div div h3 a attr{href}')
_LIST=$(echo -e "${_LIST}\n${_NEW_LIST}")
if [[ -n $_NEXT ]]; then
_PAGE=$((_PAGE+1))
else
echo "- Page ${_PAGE} was the last one"
_PAGE=1
fi
done
_LIST=$(echo "${_LIST}" | sed -r 's,^\/,,g')
fi
# filtering down repositories by the List
if [[ -n $_LIST ]]; then
echo "- Filtering all starred repositories by the ones in the list."
_DATA=$(echo "${_DATA}" | jq --argjson names "$(echo "${_LIST}" | jq -R . | jq -s .)" 'select( .name as $name | $names | index($name) )')
fi
if [[ "$(echo "${_DATA}" | jq '.name' | wc -l)" == "0" ]]; then
echo 'No entries found. Aborting.' >&2
echo
exit 1
fi
# opml file generation
echo "- Generating OPML file Header"
_FIRSTDATE=$(LC_ALL="C.UTF-8" TZ=GMT date -d "$(echo "${_DATA}" | jq -sr 'sort_by(.date) | reverse[-1].date')" '+%a, %d %b %Y %H:%M:%S %Z')
_LASTDATE=$(LC_ALL="C.UTF-8" TZ=GMT date -d "$(echo "${_DATA}" | jq -sr 'sort_by(.date)[-1].date')" '+%a, %d %b %Y %H:%M:%S %Z')
if [[ -z $_LISTNAME ]]; then
_CATEGORY="Github - ${_USERNAME}"
else
_CATEGORY="Github - ${_LISTNAME} - ${_USERNAME}"
fi
cat <<- EOF > "${_FILENAME}"
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>${_FILENAME}</title>
<dateCreated>${_FIRSTDATE}</dateCreated>
<dateModified>${_LASTDATE}</dateModified>
<ownerName>${_USERNAME}</ownerName>
</head>
<body>
<outline text="${_CATEGORY}">
EOF
echo "- Generating OPML file Feeds"
for id in $(echo "$_DATA" | jq .id); do
_REPO_NAME=$(echo "$_DATA" | jq -r --argjson v "$id" ' . | select(.id==$v).name')
_REPO_DESC=$(echo "$_DATA" | jq -r --argjson v "$id" ' . | select(.id==$v).desc' | sed -e 's~\&~\&amp;~g' -e 's~<~\&lt;~g' -e 's~>~\&gt;~g' -e 's~\"~\&quot;~g' -e "s~'~\&apos;~g")
_RELEASES=$(ghapirequest "repos/${_REPO_NAME}/releases" | jq '.[]')
_TYPE="releases"
if [[ -z "$_RELEASE" ]]; then
_TAGS=$(ghapirequest "repos/${_REPO_NAME}/tags" | jq '.[]')
if [[ -n "$_TAGS" ]]; then
_TYPE="tags"
fi
fi
cat <<- EOF >> "${_FILENAME}"
<outline title="${_REPO_NAME}" text="${_REPO_NAME}" type="rss" version="ATOM1" description="${_REPO_DESC}" xmlUrl="https://github.com/${_REPO_NAME}/${_TYPE}.atom" htmlUrl="https://github.com/${_REPO_NAME}"></outline>
EOF
done
echo "- Generating OPML file Footer"
cat <<- EOF >> "${_FILENAME}"
</outline>
</body>
</opml>
EOF
echo "- Done: Generated $(echo "${_DATA}" | jq '.name' | wc -l) entries for \"${_CATEGORY}\" in ${_FILENAME}"