Skip to content

Commit

Permalink
Remove python symmetrize from the SG graph creation
Browse files Browse the repository at this point in the history
  • Loading branch information
jnke2016 committed Sep 22, 2024
1 parent fb115da commit 47b0677
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 9 deletions.
4 changes: 4 additions & 0 deletions python/cugraph/cugraph/structure/graph_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ def from_cudf_edgelist(
This parameter is deprecated and will be removed.
symmetrize: bool, optional (default=True)
If True, symmetrize the edge list for an undirected graph. Setting
this flag to True for a directed graph returns an error.
Examples
--------
>>> df = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def _make_plc_graph(
edge_id_type,
edge_type_id,
drop_multi_edges,
symmetrize
):
weights = None
edge_ids = None
Expand Down Expand Up @@ -151,6 +152,7 @@ def _make_plc_graph(
else ([cudf.Series(dtype=edge_type_id)] if edge_type_id else None),
num_arrays=num_arrays,
store_transposed=store_transposed,
symmetrize=symmetrize,
do_expensive_check=False,
drop_multi_edges=drop_multi_edges,
)
Expand Down Expand Up @@ -183,6 +185,11 @@ def __from_edgelist(
destination
].dtype not in [np.int32, np.int64]:
raise ValueError("set renumber to True for non integer columns ids")

if (self.properties.directed and symmetrize):
raise ValueError(
"The edgelist can only be symmetrized for undirected graphs."
)

s_col = source
d_col = destination
Expand Down Expand Up @@ -370,6 +377,7 @@ def __from_edgelist(
self.edge_id_type,
self.edge_type_id_type,
not self.properties.multi_edge,
not self.properties.directed
)
for w, edata in persisted_keys_d.items()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.structure import graph_primtypes_wrapper
#from cugraph.structure import graph_primtypes_wrapper *********
from cugraph.structure.replicate_edgelist import replicate_cudf_dataframe
from cugraph.structure.symmetrize import symmetrize
from cugraph.structure.symmetrize import symmetrize as symmetrize_
from cugraph.structure.number_map import NumberMap
import cugraph.dask.common.mg_utils as mg_utils
import cudf
Expand Down Expand Up @@ -134,6 +134,7 @@ def __from_edgelist(
renumber=True,
legacy_renum_only=False,
store_transposed=False,
symmetrize=False
):
if legacy_renum_only:
warning_msg = (
Expand All @@ -142,6 +143,11 @@ def __from_edgelist(
warnings.warn(
warning_msg,
)

if (self.properties.directed and symmetrize):
raise ValueError(
"The edgelist can only be symmetrized for undirected graphs."
)

# Verify column names present in input DataFrame
s_col = source
Expand Down Expand Up @@ -268,8 +274,9 @@ def __from_edgelist(
# otherwise the inital dataframe will be returned. Duplicated edges
# will be dropped unless the graph is a MultiGraph(Not Implemented yet)
# TODO: Update Symmetrize to work on Graph and/or DataFrame
"""
if edge_attr is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col = symmetrize_(
elist,
source,
destination,
Expand All @@ -285,7 +292,7 @@ def __from_edgelist(
value_col = value_dict
else:
value_col = None
source_col, dest_col = symmetrize(
source_col, dest_col = symmetrize_(
elist,
source,
destination,
Expand All @@ -304,6 +311,43 @@ def __from_edgelist(
self.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col, value_col)
print("original edgelist = ", len(elist[source]), " symmetrize edgelist = ", len(source_col))
print("value_col = \n", value_col)
"""

#self.edgelist = simpleGraphImpl.EdgeList(elist[source], elist[destination], elist[weight])
print("\nelist = \n", elist.head())

#"""
if edge_attr is not None:
value_col = {
self.edgeWeightCol: elist[weight] if weight in edge_attr else None,
self.edgeIdCol: elist[edge_id] if edge_id in edge_attr else None,
self.edgeTypeCol: elist[edge_type]
if edge_type in edge_attr else None,
}

print("value_col = \n", value_col)
else:
value_col = None

# unsymmetrize edgelist
# FIXME: if the user calls self.edgelist after creating the graph, returns the symmetrized
# edgelist if the graph is undirected or symmetrize = True (decompress)
self.edgelist = simpleGraphImpl.EdgeList(elist[source], elist[destination], value_col)

#print("value_col_df = \n", elist[weight])
#"""










if self.batch_enabled:
self._replicate_edgelist()

Expand All @@ -312,6 +356,7 @@ def __from_edgelist(
store_transposed=store_transposed,
renumber=renumber,
drop_multi_edges=not self.properties.multi_edge,
symmetrize=not self.properties.directed
)

def to_pandas_edgelist(
Expand Down Expand Up @@ -428,7 +473,7 @@ def view_edge_list(self):
then containing the weight value for each edge
"""
if self.edgelist is None:
src, dst, weights = graph_primtypes_wrapper.view_edge_list(self)
src, dst, weights = (None, None, None)#graph_primtypes_wrapper.view_edge_list(self) ****
self.edgelist = self.EdgeList(src, dst, weights)

srcCol = self.source_columns
Expand Down Expand Up @@ -555,7 +600,10 @@ def __from_adjlist(
if value_col is not None:
self.properties.weighted = True
self._make_plc_graph(
value_col=value_col, store_transposed=store_transposed, renumber=renumber
value_col=value_col,
store_transposed=store_transposed,
renumber=renumber,
symmetrize=not self.properties.directed
)

if self.batch_enabled:
Expand Down Expand Up @@ -596,7 +644,7 @@ def view_adj_list(self):
self.transposedadjlist.weights,
)
else:
off, ind, vals = graph_primtypes_wrapper.view_adj_list(self)
off, ind, vals = (None, None, None)#graph_primtypes_wrapper.view_adj_list(self) ****
self.adjlist = self.AdjList(off, ind, vals)

if self.batch_enabled:
Expand Down Expand Up @@ -643,7 +691,7 @@ def view_transposed_adj_list(self):
off,
ind,
vals,
) = graph_primtypes_wrapper.view_transposed_adj_list(self)
) = (None, None, None)#graph_primtypes_wrapper.view_transposed_adj_list(self) *******
self.transposedadjlist = self.transposedAdjList(off, ind, vals)

if self.batch_enabled:
Expand Down Expand Up @@ -1146,6 +1194,7 @@ def _make_plc_graph(
store_transposed: bool = False,
renumber: bool = True,
drop_multi_edges: bool = False,
symmetrize: bool = False
):
"""
Parameters
Expand All @@ -1164,6 +1213,8 @@ def _make_plc_graph(
int32 or int64 type.
drop_multi_edges: bool (default=False)
Whether to drop multi edges
symmetrize: bool (default=False)
Whether to symmetrize
"""

if value_col is None:
Expand Down Expand Up @@ -1214,7 +1265,12 @@ def _make_plc_graph(
"This may cause extra memory usage. Consider passing"
" a int64 list of edge ids instead."
)
df = cudf.DataFrame()

df["srcs"] = src_or_offset_array
df["dsts"] = dst_or_index_array
df["wgts"] = weight_col
print("df = \n", df)
self._plc_graph = SGGraph(
resource_handle=ResourceHandle(),
graph_properties=graph_props,
Expand All @@ -1228,6 +1284,7 @@ def _make_plc_graph(
do_expensive_check=True,
input_array_format=input_array_format,
drop_multi_edges=drop_multi_edges,
symmetrize=symmetrize
)

def to_directed(self, DiG, store_transposed=False):
Expand Down
16 changes: 15 additions & 1 deletion python/cugraph/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,25 @@ def symmetrize(
>>> df['values'] = cudf.Series(M['2'])
>>> src, dst, val = symmetrize(df, 'sources', 'destinations', 'values', multi=True)
"""
print("multi = \n", multi)
return (
input_df[source_col_name],
input_df[dest_col_name],
input_df[value_col_name],
)




"""
warnings.warn(
"This method is deprecated and will no longer be supported. The symmetrization "
"of the edges are only supported by creating an undirected graph",
"of the edges are only supported by setting the 'symmetrize' flag to 'True'",
FutureWarning,
)
"""

"""
# FIXME: Redundant check that should be done at the graph creation
if "edge_id" in input_df.columns and symmetrize:
Expand Down Expand Up @@ -300,6 +313,7 @@ def symmetrize(
)
return output_df[source_col_name], output_df[dest_col_name]
"""


def _add_reverse_edges(df, src_name, dst_name, weight_name):
Expand Down

0 comments on commit 47b0677

Please sign in to comment.