Modules
Top-level package for bgcflow_wrapper.
bgcflow
¶
Main module.
cli
¶
Console script for bgcflow.
projects_util
¶
bgcflow_init(bgcflow_dir, global_config)
¶
Initiate a config from template
Source code in bgcflow/projects_util.py
def bgcflow_init(bgcflow_dir, global_config):
"""
Initiate a config from template
"""
# check if global config available
if global_config.is_file():
# grab available projects
print(f"Found config file at: {global_config}")
with open(global_config, "r") as file:
config_yaml = yaml.safe_load(file)
project_names = [p for p in config_yaml["projects"]]
list_of_projects = {}
for p in project_names:
if p["name"].endswith(".yaml"):
pep = peppy.Project(
str(bgcflow_dir / p["name"]), sample_table_index="genome_id"
)
name = pep.name
file_path = pep.config["sample_table"]
else:
name = p["name"]
file_path = p["samples"]
list_of_projects[name] = file_path
print("Available projects:")
for p in list_of_projects.keys():
print(f" - {p} : {file_path}")
else:
generate_global_config(bgcflow_dir, global_config)
print("Do a test run by: `bgcflow run -n`")
return
generate_global_config(bgcflow_dir, global_config)
¶
Copy config.yaml from template to config directory
Source code in bgcflow/projects_util.py
def generate_global_config(bgcflow_dir, global_config):
"""
Copy config.yaml from template to config directory
"""
print(f"Generating config file from template at: {global_config}")
template_config = bgcflow_dir / ".examples/_config_example.yaml"
assert (
template_config.is_file()
), "Cannot find template file. Are you using BGCFlow version >= 0.4.1?"
shutil.copy(template_config, global_config)
return
generate_project(bgcflow_dir, project_name, pep_version='2.1.0', use_project_rules=False, samples_csv=False, prokka_db=False, gtdb_tax=False, description=False)
¶
Generate a PEP project in BGCFlow config file:
Source code in bgcflow/projects_util.py
def generate_project(
bgcflow_dir,
project_name,
pep_version="2.1.0",
use_project_rules=False,
samples_csv=False,
prokka_db=False,
gtdb_tax=False,
description=False,
):
"""
Generate a PEP project in BGCFlow config file:
Params:
- samples_csv
"""
if bgcflow_dir is PosixPath:
pass
else:
bgcflow_dir = Path(bgcflow_dir)
global_config = bgcflow_dir / "config/config.yaml"
template_dict = {
"name": project_name,
"pep_version": pep_version,
"description": "<TO DO: give a description to your project>",
"sample_table": "samples.csv",
"prokka-db": "OPTIONAL: relative path to your `prokka-db.csv`",
"gtdb-tax": "OPTIONAL: relative path to your `gtdbtk.bac120.summary.tsv`",
}
if use_project_rules:
with open(bgcflow_dir / "workflow/rules.yaml", "r") as file:
available_rules = yaml.safeload(file)
available_rules = {rule: "FALSE" for rule in available_rules.keys()}
template_dict["rules"] = available_rules
project_dir = bgcflow_dir / f"config/{project_name}"
project_dir.mkdir(parents=True, exist_ok=True)
if type(samples_csv) == pd.core.frame.DataFrame:
print("Generating samples file from Pandas DataFrame")
assert samples_csv.index.name == "genome_id"
assert (
samples_csv.columns
== [
"source",
"organism",
"genus",
"species",
"strain",
"closest_placement_reference",
]
).all
samples_csv.to_csv(project_dir / "samples.csv")
elif type(samples_csv) == str:
print(f"Copying samples file from {samples_csv}")
samples_csv = Path(samples_csv)
assert samples_csv.is_file()
shutil.copy(samples_csv, project_dir / "samples.csv")
if type(prokka_db) == str:
print(f"Copying custom annotation file from {prokka_db}")
prokka_db = Path(prokka_db)
assert prokka_db.is_file()
shutil.copy(prokka_db, project_dir / "prokka-db.csv")
template_dict["prokka-db"] = "prokka-db.csv"
if type(gtdb_tax) == str:
print(f"Copying custom taxonomy from {gtdb_tax}")
gtdb_tax = Path(gtdb_tax)
assert gtdb_tax.is_file()
shutil.copy(gtdb_tax, project_dir / "gtdbtk.bac120.summary.tsv")
template_dict["gtdb-tax"] = "gtdbtk.bac120.summary.tsv"
if type(description) == str:
print("Writing project description...")
template_dict["description"] = description
print(f"Project config file generated in: {project_dir}")
with open(project_dir / "project_config.yaml", "w") as file:
yaml.dump(template_dict, file, sort_keys=False)
if not global_config.is_file():
bgcflow_init(bgcflow_dir, global_config)
with open(bgcflow_dir / "config/config.yaml", "r") as file:
print("Updating global config.yaml")
main_config = yaml.safe_load(file)
project_names = [p["name"] for p in main_config["projects"]]
assert (
project_name not in project_names
), f"Project name: '{project_name}' already exists!\nUse a different name or edit the files in: {project_dir}"
assert (
str(project_dir / "project_config.yaml") not in project_names
), f"Project name: '{project_name}' already exists!\nUse a different name or edit the files in: {project_dir}"
main_config["projects"].append(
{"name": str(project_dir / "project_config.yaml")}
)
with open(bgcflow_dir / "config/config.yaml", "w") as file:
yaml.dump(main_config, file, sort_keys=False)
return