summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/analysis-notebook-test-binder.yml108
-rw-r--r--.gitignore4
-rw-r--r--README.md28
-rw-r--r--bin/run_notebooks.py67
-rw-r--r--environment.yml10
-rw-r--r--setup/environment.yml128
-rw-r--r--setup/setup.sh2
-rw-r--r--setup/setup_conda.sh2
-rw-r--r--setup/teardown.sh2
-rw-r--r--setup/teardown_conda.sh2
10 files changed, 218 insertions, 135 deletions
diff --git a/.github/workflows/analysis-notebook-test-binder.yml b/.github/workflows/analysis-notebook-test-binder.yml
new file mode 100644
index 0000000..e0185c0
--- /dev/null
+++ b/.github/workflows/analysis-notebook-test-binder.yml
@@ -0,0 +1,108 @@
+# This is a basic workflow to help you get started with Actions
+
+name: analysis-notebook-test-binder
+
+# Controls when the action will run. Triggers the workflow on push or pull request
+# events but only for the main branch
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ schedule:
+ # * is a special character in YAML so you have to quote this string
+ - cron: '5 4 * * 0'
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+ build:
+ # The type of runner that the job will run on
+ runs-on: ubuntu-latest
+ outputs:
+ IMAGE_SHA_NAME: ${{ steps.install-using-repo2docker.outputs.IMAGE_SHA_NAME }}
+ OUT_IMGNAME: ${{ steps.package-build.outputs.OUT_IMGNAME }}
+ strategy:
+ matrix:
+ # Two tests are sufficient because the other notebooks are already
+ # tested in the manual install and the timeline notebooks are almost
+ # identical
+ target: [ui_security_inventory_23_parsing.ipynb]
+
+ # Steps represent a sequence of tasks that will be executed as part of the job
+ steps:
+ # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+ - name: Checkout
+ uses: actions/checkout@v2
+
+ - name: Create a docker image using repo2docker
+ id: install-using-repo2docker
+ uses: shankari/repo2docker-action@master
+ with:
+ IMAGE_NAME: "toriis"
+
+ - name: Verify repo2docker environment
+ run: |
+ echo "image SHA was ${{ steps.install-using-repo2docker.outputs.IMAGE_SHA_NAME }}"
+ echo "Listing all images"
+ docker image list
+
+ echo "Checking to see if toriis exists"
+ docker image list | grep toriis | wc -l
+ echo "Checking to see if repo2docker creation image exists"
+ docker image list | grep repo2docker | wc -l
+
+ echo "Reading versions of related software"
+ echo "---- DOCKER ----"
+ docker --version
+ CURR_DOCKER_VER=`docker --version`
+ echo "---- CONDA ----"
+ docker run ${{ steps.install-using-repo2docker.outputs.IMAGE_SHA_NAME }} /srv/conda/bin/conda --version | cut -d " " -f 2
+ CURR_CONDA_VER=`docker run ${{ steps.install-using-repo2docker.outputs.IMAGE_SHA_NAME }} /srv/conda/bin/conda --version | cut -d " " -f 2`
+ echo "---- repo2docker----"
+ docker run jupyter/repo2docker:master pip3 list | grep jupyter-repo2docker
+ CURR_R2D_VER=`docker run jupyter/repo2docker:master pip3 list | grep jupyter-repo2docker`
+ echo "On checking, docker ver is $CURR_DOCKER_VER"
+ echo " conda ver is $CURR_CONDA_VER and"
+ echo " repo2docker ver is $CURR_R2D_VER"
+
+ - name: Start the recently built docker container
+ run: |
+ echo "image SHA was ${{ steps.install-using-repo2docker.outputs.IMAGE_SHA_NAME }}"
+ docker run --name toriis_test -d -p 8888:8888 ${{ steps.install-using-repo2docker.outputs.IMAGE_SHA_NAME }} jupyter notebook --ip 0.0.0.0
+
+ - name: Test the interactive timeline
+ run: |
+ echo "About to run notebooks"
+ docker exec toriis_test python bin/run_notebooks.py ${{ matrix.target }}
+ echo "After running the notebooks, checking container list"
+ docker container list
+
+ - name: Copy the failure output
+ id: copy-failure-output
+ if: failure()
+ run: |
+ OUT_FILENAME=`echo ${{ matrix.target }} | sed "s/.ipynb/_out.ipynb/"`
+ echo "output filename = $OUT_FILENAME"
+
+ echo "About to copy file from docker"
+ docker exec toriis_test ls
+ DOCKER_WD=`docker exec toriis_test pwd`
+ echo "DOCKER_WD=$DOCKER_WD"
+ docker exec toriis_test ls $DOCKER_WD
+ docker exec toriis_test ls $DOCKER_WD/$OUT_FILENAME
+ docker cp toriis_test:$DOCKER_WD/$OUT_FILENAME /tmp
+ ls -al /tmp/$OUT_FILENAME
+ echo "::set-output name=OUT_FILENAME::${OUT_FILENAME}"
+
+ - name: Upload result for the interactive timeline
+ if: failure()
+ uses: actions/upload-artifact@v1
+ with:
+ name: ${{ steps.copy-failure-output.outputs.OUT_FILENAME }}
+ path: /tmp/${{ steps.copy-failure-output.outputs.OUT_FILENAME }}
+
+ - name: Shutdown the recently built docker container
+ run: |
+ echo "Running docker container list"
+ docker container list
+ docker stop toriis
diff --git a/.gitignore b/.gitignore
index 76605c9..3ff48d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,9 @@
# Jupyter Notebook
.ipynb_checkpoints
+# Directory's to ignore
+.conda
+.idea
+
# Private Data
data/FFIS-CU200-2021-Q4.xlsx
diff --git a/README.md b/README.md
index e135132..56e51d9 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,29 @@
+[![analysis-notebook-test-binder](https://github.com/toriis-portal/toriis-analysis/actions/workflows/analysis-notebook-test-binder.yml/badge.svg)](https://github.com/toriis-portal/toriis-analysis/actions/workflows/analysis-notebook-test-binder.yml)
# toriis-analysis
-Repository for scripts to evaluate public institutional investment data.
+Repository for scripts to evaluate public institutional investment data. These scripts are designed to be launched with [binder](https://mybinder.org/) so that other community members can run their own analyses without any additional setup.
+
+
+
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/toriis-portal/toriis-analysis/HEAD)
# Running existing notebook
-1)
- + Fork + clone repo
- + Run `setup/setup.sh` to set up the local conda environment
+1. **Interactive, UI-only:** Launch the repo in binder and clone one of the example notebooks
+1. **Interactive, CLI only:**
+ 1. Fork + clone repo
+ 1. Run `setup/setup.sh` to set up the local conda environment
- you may need to install the correct version of miniconda
- + Start a local notebook suerver (`juypter notebook`)
+ 1. Start a local notebook suerver (`juypter notebook`)
# Contributing
You are welcome to contribute data and analysis results. Please make sure that you contribute **cleared notebooks** so version control capabilities work well. To clear a notebook, use `Kernel -> Researt & Clear Output)`
-1) **CLI:**
- + Create a new branch (`$ git checkout -b <branch_name>`)
- + Commit the new notebook/data (`$ git add <new file>; git commit`)
- + Push and generate a pull request
+1. **UI-only:**
+ 1. Download the notebook (Download -> ipynb)
+ 1. Upload the notebook using the GitHub UI (Upload Files, next to Clone or Download)
+1. **CLI only:** Follow the [instructions on github](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) - i.e.
+ 1. Create a new branch (e.g. `$ git checkout -b`)
+ 1. Commit the new notebook (e.g. `$ git add` and `$ git commit`)
+ 1. Push and generate pull request
+
diff --git a/bin/run_notebooks.py b/bin/run_notebooks.py
new file mode 100644
index 0000000..ea7e8ee
--- /dev/null
+++ b/bin/run_notebooks.py
@@ -0,0 +1,67 @@
+# ! python
+# coding: utf-8
+# From https://gist.github.com/tpogden/ec79f2ebe2baf45655445b575dc7f540
+# with some more context at https://ogden.eu/run-notebooks
+# Thanks to @tpogden for his gist
+
+import os
+import sys
+import argparse
+import glob
+
+import nbformat
+from nbconvert.preprocessors import ExecutePreprocessor
+from nbconvert.preprocessors.execute import CellExecutionError
+
+# Parse args
+parser = argparse.ArgumentParser(description="Runs a set of Jupyter \
+ notebooks.")
+file_text = """ Notebook file(s) to be run, e.g. '*.ipynb' (default),
+'my_nb1.ipynb', 'my_nb1.ipynb my_nb2.ipynb', 'my_dir/*.ipynb'
+"""
+parser.add_argument('file_list', metavar='F', type=str, nargs='*',
+ help=file_text)
+parser.add_argument('-t', '--timeout', help='Length of time (in secs) a cell \
+ can run before raising TimeoutError (default 600).', default=600,
+ required=False)
+parser.add_argument('-p', '--run-path', help='The path the notebook will be \
+ run from (default pwd).', default='.', required=False)
+args = parser.parse_args()
+print('Args:', args)
+if not args.file_list: # Default file_list
+ args.file_list = glob.glob('*.ipynb')
+
+# Check list of notebooks
+notebooks = []
+print('Notebooks to run:')
+for f in args.file_list:
+ # Find notebooks but not notebooks previously output from this script
+ if f.endswith('.ipynb') and not f.endswith('_out.ipynb'):
+ print(f[:-6])
+ notebooks.append(f[:-6]) # Want the filename without '.ipynb'
+
+# Execute notebooks and output
+num_notebooks = len(notebooks)
+print('*****')
+for i, n in enumerate(notebooks):
+ n_out = n + '_out'
+ with open(n + '.ipynb') as f:
+ nb = nbformat.read(f, as_version=4)
+ ep = ExecutePreprocessor(timeout=int(args.timeout), kernel_name='python3')
+ try:
+ print('Running', n, ':', i, '/', num_notebooks)
+ out = ep.preprocess(nb, {'metadata': {'path': args.run_path}})
+ except CellExecutionError:
+ out = None
+ msg = 'Error executing the notebook "%s".\n' % n
+ msg += 'See notebook "%s" for the traceback.' % n_out
+ print(msg)
+ sys.exit(1)
+ except TimeoutError:
+ msg = 'Timeout executing the notebook "%s".\n' % n
+ print(msg)
+ sys.exit(110)
+ finally:
+ # Write output file
+ with open(n_out + '.ipynb', mode='wt') as f:
+ nbformat.write(nb, f) \ No newline at end of file
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..9898f22
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,10 @@
+name: toriis
+channels:
+ - conda-forge
+ - defaults
+dependencies:
+ - numpy=1.23.5
+ - pandas=1.5.3
+ - requests=2.28.2
+ - yfinance=0.2.12=pyhd8ed1ab_0
+ - openpyxl=3.1.1 \ No newline at end of file
diff --git a/setup/environment.yml b/setup/environment.yml
index c72a998..9898f22 100644
--- a/setup/environment.yml
+++ b/setup/environment.yml
@@ -1,126 +1,10 @@
name: toriis
channels:
+ - conda-forge
- defaults
dependencies:
- - _libgcc_mutex=0.1=main
- - _openmp_mutex=5.1=1_gnu
- - anyio=3.5.0=py310h06a4308_0
- - argon2-cffi=21.3.0=pyhd3eb1b0_0
- - argon2-cffi-bindings=21.2.0=py310h7f8727e_0
- - asttokens=2.0.5=pyhd3eb1b0_0
- - attrs=22.1.0=py310h06a4308_0
- - babel=2.11.0=py310h06a4308_0
- - backcall=0.2.0=pyhd3eb1b0_0
- - beautifulsoup4=4.11.1=py310h06a4308_0
- - blas=1.0=mkl
- - bleach=4.1.0=pyhd3eb1b0_0
- - bottleneck=1.3.5=py310ha9d4c09_0
- - brotlipy=0.7.0=py310h7f8727e_1002
- - bzip2=1.0.8=h7b6447c_0
- - ca-certificates=2023.01.10=h06a4308_0
- - certifi=2022.12.7=py310h06a4308_0
- - cffi=1.15.1=py310h5eee18b_3
- - charset-normalizer=2.0.4=pyhd3eb1b0_0
- - comm=0.1.2=py310h06a4308_0
- - cryptography=38.0.4=py310h9ce1e76_0
- - debugpy=1.5.1=py310h295c915_0
- - decorator=5.1.1=pyhd3eb1b0_0
- - defusedxml=0.7.1=pyhd3eb1b0_0
- - entrypoints=0.4=py310h06a4308_0
- - executing=0.8.3=pyhd3eb1b0_0
- - flit-core=3.6.0=pyhd3eb1b0_0
- - icu=58.2=he6710b0_3
- - idna=3.4=py310h06a4308_0
- - intel-openmp=2021.4.0=h06a4308_3561
- - ipykernel=6.19.2=py310h2f386ee_0
- - ipython=8.9.0=py310h06a4308_0
- - ipython_genutils=0.2.0=pyhd3eb1b0_1
- - jedi=0.18.1=py310h06a4308_1
- - jinja2=3.1.2=py310h06a4308_0
- - json5=0.9.6=pyhd3eb1b0_0
- - jsonschema=4.16.0=py310h06a4308_0
- - jupyter_client=7.4.9=py310h06a4308_0
- - jupyter_core=5.1.1=py310h06a4308_0
- - jupyter_server=1.23.4=py310h06a4308_0
- - jupyterlab=3.5.3=py310h06a4308_0
- - jupyterlab_pygments=0.1.2=py_0
- - jupyterlab_server=2.16.5=py310h06a4308_0
- - ld_impl_linux-64=2.38=h1181459_1
- - libffi=3.4.2=h6a678d5_6
- - libgcc-ng=11.2.0=h1234567_1
- - libgomp=11.2.0=h1234567_1
- - libsodium=1.0.18=h7b6447c_0
- - libstdcxx-ng=11.2.0=h1234567_1
- - libuuid=1.41.5=h5eee18b_0
- - libxml2=2.9.14=h74e7548_0
- - libxslt=1.1.35=h4e12654_0
- - lxml=4.9.1=py310h1edc446_0
- - markupsafe=2.1.1=py310h7f8727e_0
- - matplotlib-inline=0.1.6=py310h06a4308_0
- - mistune=0.8.4=py310h7f8727e_1000
- - mkl=2021.4.0=h06a4308_640
- - mkl-service=2.4.0=py310h7f8727e_0
- - mkl_fft=1.3.1=py310hd6ae3a3_0
- - mkl_random=1.2.2=py310h00e6091_0
- - nbclassic=0.4.8=py310h06a4308_0
- - nbclient=0.5.13=py310h06a4308_0
- - nbconvert=6.5.4=py310h06a4308_0
- - nbformat=5.7.0=py310h06a4308_0
- - ncurses=6.4=h6a678d5_0
- - nest-asyncio=1.5.6=py310h06a4308_0
- - notebook=6.5.2=py310h06a4308_0
- - notebook-shim=0.2.2=py310h06a4308_0
- - numexpr=2.8.4=py310h8879344_0
- - numpy=1.23.5=py310hd5efca6_0
- - numpy-base=1.23.5=py310h8e6c178_0
- - openssl=1.1.1t=h7f8727e_0
- - packaging=22.0=py310h06a4308_0
- - pandas=1.5.2=py310h1128e8f_0
- - pandocfilters=1.5.0=pyhd3eb1b0_0
- - parso=0.8.3=pyhd3eb1b0_0
- - pexpect=4.8.0=pyhd3eb1b0_3
- - pickleshare=0.7.5=pyhd3eb1b0_1003
- - pip=22.3.1=py310h06a4308_0
- - platformdirs=2.5.2=py310h06a4308_0
- - prometheus_client=0.14.1=py310h06a4308_0
- - prompt-toolkit=3.0.36=py310h06a4308_0
- - psutil=5.9.0=py310h5eee18b_0
- - ptyprocess=0.7.0=pyhd3eb1b0_2
- - pure_eval=0.2.2=pyhd3eb1b0_0
- - pycparser=2.21=pyhd3eb1b0_0
- - pygments=2.11.2=pyhd3eb1b0_0
- - pyopenssl=22.0.0=pyhd3eb1b0_0
- - pyrsistent=0.18.0=py310h7f8727e_0
- - pysocks=1.7.1=py310h06a4308_0
- - python=3.10.9=h7a1cb2a_0
- - python-dateutil=2.8.2=pyhd3eb1b0_0
- - python-fastjsonschema=2.16.2=py310h06a4308_0
- - pytz=2022.7=py310h06a4308_0
- - pyzmq=23.2.0=py310h6a678d5_0
- - readline=8.2=h5eee18b_0
- - requests=2.28.1=py310h06a4308_0
- - send2trash=1.8.0=pyhd3eb1b0_1
- - setuptools=65.6.3=py310h06a4308_0
- - six=1.16.0=pyhd3eb1b0_1
- - sniffio=1.2.0=py310h06a4308_1
- - soupsieve=2.3.2.post1=py310h06a4308_0
- - sqlite=3.40.1=h5082296_0
- - stack_data=0.2.0=pyhd3eb1b0_0
- - terminado=0.17.1=py310h06a4308_0
- - tinycss2=1.2.1=py310h06a4308_0
- - tk=8.6.12=h1ccaba5_0
- - tomli=2.0.1=py310h06a4308_0
- - tornado=6.2=py310h5eee18b_0
- - traitlets=5.7.1=py310h06a4308_0
- - typing-extensions=4.4.0=py310h06a4308_0
- - typing_extensions=4.4.0=py310h06a4308_0
- - tzdata=2022g=h04d1e81_0
- - urllib3=1.26.14=py310h06a4308_0
- - wcwidth=0.2.5=pyhd3eb1b0_0
- - webencodings=0.5.1=py310h06a4308_1
- - websocket-client=0.58.0=py310h06a4308_4
- - wheel=0.37.1=pyhd3eb1b0_0
- - xz=5.2.10=h5eee18b_1
- - zeromq=4.3.4=h2531618_0
- - zlib=1.2.13=h5eee18b_0
-prefix: /home/gabrielkosmacher/miniconda3/envs/toriis
+ - numpy=1.23.5
+ - pandas=1.5.3
+ - requests=2.28.2
+ - yfinance=0.2.12=pyhd8ed1ab_0
+ - openpyxl=3.1.1 \ No newline at end of file
diff --git a/setup/setup.sh b/setup/setup.sh
index d2631f9..6730a4f 100644
--- a/setup/setup.sh
+++ b/setup/setup.sh
@@ -11,4 +11,4 @@ set -e
source setup/checks/check_for_conda.sh
conda env update --name toriis --file setup/environment.yml
-conda activate toriis
+conda activate toriis \ No newline at end of file
diff --git a/setup/setup_conda.sh b/setup/setup_conda.sh
index e7d4e3e..ee4d57f 100644
--- a/setup/setup_conda.sh
+++ b/setup/setup_conda.sh
@@ -19,4 +19,4 @@ else
# Useful for debugging any issues with conda
conda info -a
echo "Successfully installed at $INSTALL_PREFIX. Please run 'source $SOURCE_SCRIPT' in every terminal where you want to use conda"
-fi
+fi \ No newline at end of file
diff --git a/setup/teardown.sh b/setup/teardown.sh
index 308fde8..f8ad631 100644
--- a/setup/teardown.sh
+++ b/setup/teardown.sh
@@ -1,3 +1,3 @@
# taken from mobilitynet-analysis-scripts/setup/teardown.sh
conda activate base
-conda env remove --name toriis
+conda env remove --name toriis \ No newline at end of file
diff --git a/setup/teardown_conda.sh b/setup/teardown_conda.sh
index 2fde7a5..a61f834 100644
--- a/setup/teardown_conda.sh
+++ b/setup/teardown_conda.sh
@@ -6,4 +6,4 @@ if [ -z $EXP_CONDA_VER ]; then
else
INSTALL_PREFIX=$HOME/miniconda-$EXP_CONDA_VER
rm -rf $INSTALL_PREFIX
-fi
+fi \ No newline at end of file