Skip to content

Commit

Permalink
Merge pull request #108 from RIVM-bioinformatics/main
Browse files Browse the repository at this point in the history
chore: sync main to dev
  • Loading branch information
florianzwagemaker authored Oct 8, 2024
2 parents 6553973 + a6d8efb commit 379ffc9
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 38 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## [1.4.6](https://github.com/RIVM-bioinformatics/ViroConstrictor/compare/v1.4.5...v1.4.6) (2024-10-08)


### Bug Fixes

* properly solve DAG workflow for nonsegmented matched-ref samples ([02a821a](https://github.com/RIVM-bioinformatics/ViroConstrictor/commit/02a821a44c3ed3741c65825789ef25ad3e2093c1))

## [1.4.5](https://github.com/RIVM-bioinformatics/ViroConstrictor/compare/v1.4.4...v1.4.5) (2024-09-25)


Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ authors:
National Institute for Public Health and the
Environment (RIVM)
- name: "The RIVM-IDS Bioinformatics team"
version: 1.4.5 #x-release-please-version
version: 1.4.6 #x-release-please-version
doi: 10.5281/zenodo.7688035
identifiers:
- type: doi
Expand Down
2 changes: 1 addition & 1 deletion ViroConstrictor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "1.4.5"
__version__ = "1.4.6"
__prog__ = "ViroConstrictor"
72 changes: 41 additions & 31 deletions ViroConstrictor/workflow/scripts/amplicon_covs.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,37 +147,39 @@ def remove_alt_primer_r(df):


def Find_NonOverlap(df):
dd = df.to_dict(orient="records")
startingpoint = {}
endingpoint = {}
lastindex = list(enumerate(dd))[-1][0]
firstindex = list(enumerate(dd))[0][0]
for x, v in enumerate(dd):
t_end = v.get("rightstart")
s = dd[x - 1].get("rightstart") if x != firstindex else v.get("leftend")
end_override = dd[x + 1].get("leftend") if x != lastindex else None
primerstart = s
if end_override is not None and end_override in range(primerstart, t_end):
primerend = end_override
else:
primerend = t_end
startingpoint[primerstart] = v.get("name")
endingpoint[primerend] = v.get("name")

startdf = (
pd.DataFrame.from_dict(startingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_start"})
)
enddf = (
pd.DataFrame.from_dict(endingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_end"})
)
df = pd.merge(df, startdf, on="name", how="inner")
df = pd.merge(df, enddf, on="name", how="inner")

return df
if not df.empty:
dd = df.to_dict(orient="records")
startingpoint = {}
endingpoint = {}
lastindex = list(enumerate(dd))[-1][0]
firstindex = list(enumerate(dd))[0][0]
for x, v in enumerate(dd):
t_end = v.get("rightstart")
s = dd[x - 1].get("rightstart") if x != firstindex else v.get("leftend")
end_override = dd[x + 1].get("leftend") if x != lastindex else None
primerstart = s
if end_override is not None and end_override in range(primerstart, t_end):
primerend = end_override
else:
primerend = t_end
startingpoint[primerstart] = v.get("name")
endingpoint[primerend] = v.get("name")

startdf = (
pd.DataFrame.from_dict(startingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_start"})
)
enddf = (
pd.DataFrame.from_dict(endingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_end"})
)
df = pd.merge(df, startdf, on="name", how="inner")
df = pd.merge(df, enddf, on="name", how="inner")
return df
else:
return pd.DataFrame(columns=["name", "leftstart", "leftend", "rightstart", "rightend", "unique_start", "unique_end"])


def avg(lst):
Expand Down Expand Up @@ -251,6 +253,14 @@ def pad_name(name):
lf = remove_alt_primer_l(remove_alt_keyword(lf))
rf = remove_alt_primer_r(remove_alt_keyword(rf))

# if either lf or rf is empty, write empty csv and exit
# csv will have one row with index "flags.key" and an empty value, no column name
if len(lf) == 0 or len(rf) == 0:
df = pd.DataFrame({flags.key: [None]})
print(df)
df.to_csv(flags.output, sep=",", index=False, header=False)
sys.exit(0)

non_overlapping_points = Find_NonOverlap(
pd.merge(lf, rf, on="name", how="inner")
.rename(
Expand Down
19 changes: 14 additions & 5 deletions ViroConstrictor/workflow/workflow.smk
Original file line number Diff line number Diff line change
Expand Up @@ -702,19 +702,28 @@ def group_aminoacids_inputs(wildcards):
select_samples = list(
samples_df.loc[samples_df["Virus"] == i]["sample"].unique()
)
select_refIDs = list(samples_df.loc[samples_df["Virus"] == i]["RefID"].unique())
# for x in select_samples:
# y = samples_df.loc[(samples_df["Virus"] == i) & (samples_df["sample"] == x)]["RefID"].unique()
# print(y)
# select_refIDs = list(samples_df.loc[samples_df["Virus"] == i]["RefID"].unique())
# print(select_refIDs)

# create a dictionary of dictionaries for each virus, with 'i' as the primary key and sample as the secondary key having a list of refIDs as the value
struct[i] = {sample: select_refIDs for sample in select_samples}

struct[i] = {
sample: list(
samples_df.loc[
(samples_df["Virus"] == i) & (samples_df["sample"] == sample)
]["RefID"].unique()
)
for sample in select_samples
}
file_list = []
for virus, sample in struct.items():
for sample, refid in sample.items():
for ref in refid:
file_list.append(
f"{datadir}Virus~{virus}/RefID~{ref}/{amino}{sample}/aa.faa"
)

return file_list


Expand Down Expand Up @@ -823,7 +832,7 @@ rule concat_boc:

rule calculate_amplicon_cov:
input:
pr=f"{datadir}{wc_folder}{prim}" "{sample}_removedprimers.bed",
pr=f"{datadir}{wc_folder}{prim}" "{sample}_primers.bed",
cov=rules.trueconsense.output.cov,
output:
f"{datadir}{wc_folder}{prim}" "{sample}_ampliconcoverage.csv",
Expand Down

0 comments on commit 379ffc9

Please sign in to comment.