protein_rag / setup.sh
ericzhang1122's picture
Upload folder using huggingface_hub
5c20520 verified
#!/bin/bash
# Define InterProScan version
IPS_VERSION="5.75-106.0"
CONDA_ENV_NAME="rag_llm"
IPS_DIR="interproscan-${IPS_VERSION}"
IPS_TAR="interproscan-${IPS_VERSION}-64-bit.tar.gz"
IPS_URL="https://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/${IPS_VERSION}/${IPS_TAR}"
# Check if conda is available
if ! command -v conda &> /dev/null; then
echo "Error: conda is not installed or not in PATH"
echo "Please install Miniconda or Anaconda first"
exit 1
fi
# Create conda environment with Java 11
echo "Creating conda environment '${CONDA_ENV_NAME}' with OpenJDK 11..."
conda create -y -n ${CONDA_ENV_NAME} openjdk=11 python
if [ $? -ne 0 ]; then
echo "Error: Failed to create conda environment"
exit 1
fi
# Activate conda environment
echo "Activating conda environment..."
source $(conda info --base)/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
# Create installation directory
echo "Setting up InterProScan ${IPS_VERSION}..."
mkdir -p interproscan
cd interproscan || exit 1
# Download InterProScan and checksum
echo "Downloading InterProScan..."
wget -nc "${IPS_URL}"
wget -nc "${IPS_URL}.md5"
# Verify MD5 checksum
echo "Verifying download integrity..."
if ! md5sum -c "${IPS_TAR}.md5"; then
echo "ERROR: MD5 checksum verification failed!"
echo "The downloaded file may be corrupted. Please try downloading again."
exit 1
fi
# Extract package
echo "Extracting InterProScan..."
tar -xzf "${IPS_TAR}"
# Verify Java installation in conda env
echo "Checking Java environment in conda env..."
JAVA_VER=$(java -version 2>&1 | head -n 1 | awk -F '"' '{print $2}')
if [[ "$JAVA_VER" =~ ^11\. ]]; then
echo "Found compatible Java version in conda env: $JAVA_VER"
else
echo "Error: Java version in conda env is not 11.x (found: $JAVA_VER)"
exit 1
fi
# Run setup
echo "Running InterProScan setup..."
cd "${IPS_DIR}" || exit 1
python setup.py -f interproscan.properties
echo ""
echo "InterProScan installation completed in conda environment '${CONDA_ENV_NAME}'!"
echo "To use InterProScan, first activate the conda environment:"
echo "conda activate ${CONDA_ENV_NAME}"
echo "Then add InterProScan to your PATH:"
echo "export PATH=\$PATH:$(pwd)"
echo "You may also need to set INTERPROSCAN_HOME=$(pwd)"
cd ../
# install biopython for blast
echo "Installing Biopython for BLAST support..."
pip install biopython
echo "Biopython installation completed."
# Install BLAST from bioconda
echo "Installing BLAST from bioconda..."
conda config --add channels bioconda
conda config --add channels conda-forge
conda install -c bioconda blast=2.16.0 -y
mkdir -p blast_db
cd blast_db || exit 1
echo "Downloading UniProt SwissProt database..."
wget --quiet --show-progress -N https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
if [ -f "uniprot_sprot.fasta.gz" ]; then
echo "Decompressing database..."
gunzip -k uniprot_sprot.fasta.gz
if [ -f "uniprot_sprot.fasta" ]; then
echo "Creating BLAST database..."
makeblastdb -in uniprot_sprot.fasta -dbtype prot -out uniprot_swissprot -parse_seqids -title "UniProt SwissProt"
# Verify database creation
if [ -f "uniprot_swissprot.phr" ]; then
echo "BLAST database created successfully."
echo "You can now use it with: blastp -db uniprot_swissprot -query your_file.fasta"
else
echo "Error: BLAST database files not created!" >&2
exit 1
fi
else
echo "Error: Failed to decompress database!" >&2
exit 1
fi
else
echo "Error: Failed to download database!" >&2
exit 1
fi
export BLASTDB=$(pwd)
echo "BLASTDB environment variable set to: $BLASTDB"
echo "please add <export BLASTDB=$(pwd)> to your .bashrc or .zshrc file for persistent use."
# install python packages
echo "Installing required Python packages..."
pip install openai gradio torch